Merge branch 'master' into sebb/build_using_tar_dir

commit: cde31d387529f3b2d4dc258501233426463014ef [log] [tgz]
author: sebbASF <sebbASF@users.noreply.github.com> Sat May 27 09:49:35 2023 +0100
committer: GitHub <noreply@github.com> Sat May 27 09:49:35 2023 +0100
tree: 5a22981f10a27d9a3ffad74f553319bf1af5507c
parent: 9f5e6245ffc67ca6f31e5a34a354ebccc6a22771 [diff]
parent: de5892e8394e3fccc63946d41f75c9bc80b8cfcd [diff]
diff --git a/Dockerfile b/Dockerfile
index 3c180c1..23f5e67 100644
--- a/Dockerfile
+++ b/Dockerfile

@@ -47,13 +47,9 @@
 # Copy the current ASF code
 WORKDIR /tmp/pelican-asf
 # copy only the GFM build code initially, to reduce rebuilds
-COPY bin bin
+COPY bin/build-cmark.sh bin/build-cmark.sh
 # build gfm
 RUN ./bin/build-cmark.sh | grep LIBCMARKDIR > LIBCMARKDIR.sh
-# we also need the plugins
-COPY plugins plugins
-# we may need to explain how to create a pelicanconf.yaml
-COPY pelicanconf.md pelicanconf.md
 
 # Standard Pelican stuff
 # rebase the image to save up to 230MB of image size
@@ -68,23 +64,29 @@
 # we likely do not need the following
 # RUN apt install wget unzip fontconfig -y
 
-ARG PELICAN_VERSION=4.6.0
-ARG MATPLOTLIB_VERSION=3.4.1
+ARG PELICAN_VERSION=4.7.0
 RUN pip install pelican==${PELICAN_VERSION}
-RUN pip install matplotlib==${MATPLOTLIB_VERSION}
 
 # Copy the built cmark and ASF 
 WORKDIR /tmp/pelican-asf
 COPY --from=pelican-asf /tmp/pelican-asf .
 
 COPY requirements.txt .
+# Don't automatically load dependencies; please add them to requirements.txt instead
 RUN pip install -r requirements.txt --no-deps
 
+# Now add the local code; do this last to avoid unnecessary rebuilds
+COPY bin bin
+COPY plugins plugins
+
 # If the site needs authtokens to build, copy them into the file .authtokens
 # and it will be picked up at build time
 # N.B. make sure the .authtokens file is not committed to the repo!
 RUN ln -s /site/.authtokens /root/.authtokens
 
+# buildsite.py expects python to be here:
+RUN ln -s /usr/local/bin/python3 /usr/bin/python3
+
 # Run Pelican
 WORKDIR /site
 

diff --git a/README.md b/README.md
index 2ad2782..db60a47 100644
--- a/README.md
+++ b/README.md

@@ -1,35 +1,14 @@
-# Tools for using Pelican at the ASF
+Title: README
 
-_TBD_
+## Tools for using Pelican at the ASF
 
-## Step One: Build libcmark-gfm
+The infrastructure-pelican repository provides a customized process
+for working with Pelican-based websites at the ASF. 
 
-```
-$ mkdir /tmp/cm
-$ cd /tmp/cm
-$ /path/to/infrastructure-pelican/bin/build-cmark.sh
-... (build output here)
-export LIBCMARKDIR='/tmp/cm/cmark-gfm-0.28.3.gfm.12/lib'
-$
-```
+See the <a href="https://infra.apache.org/asf-pelican-gettingstarted.html" target="_blank">getting started guide</a> for working with the ASF Pelican template.
 
-Copy/paste/execute that printed `export` line for use in the following steps.
+## Running Local Preview Builds
 
-(of course, you may use any location of your choice; `/tmp/cm` is
-merely an example)
+See the instructions for <a href="https://infra.apache.org/asf-pelican-local.html" target="_blank">local Pelican builds</a>.
 
-### Installing libcmark-gfm via packages
-
-_TBD: install a .deb from packages.apache.org_
-
-_TBD: maybe a macOS variant?_
-
-_TBD: maybe Windows?_
-
-## Step Two
-
-_TBD: credentials need to provided in (eg.) bb2.txt_
-
-```
-$ ./kick_build.py --repo=www-site --notify=somewhere@example.com --theme theme/apache --min-pages=200
-```
+Contact `users@infra.apache.org` for any questions or comments.

diff --git a/bin/build-cmark.sh b/bin/build-cmark.sh
index 7917bba..68be098 100755
--- a/bin/build-cmark.sh
+++ b/bin/build-cmark.sh

@@ -30,7 +30,7 @@
 # this is checked at the start of the build
 OUTPUTDIR=${3:-.}
 
-ARCHIVES="https://github.com/github/cmark/archive"
+ARCHIVES="https://github.com/github/cmark-gfm/archive/refs/tags"
 LOCAL="${TARDIR}/cmark-gfm.$VERSION.orig.tar.gz"
 
 # Follow redirects, and place the result into known name $LOCAL

diff --git a/bin/buildsite.py b/bin/buildsite.py
index b6c1063..b83de51 100755
--- a/bin/buildsite.py
+++ b/bin/buildsite.py

@@ -1,4 +1,12 @@
 #!/usr/bin/env python3
+#
+# To run this in dev/test, then LIBCMARKDIR must be defined in the
+# environment.
+#
+# $ export LIBCMARKDIR=/path/to/cmark-gfm.0.28.3.gfm.12/lib
+#
+# ### see build-cmark.sh for building the lib
+#
 
 import sys
 
@@ -25,16 +33,15 @@
 # Command definitions - put into a conf later on?
 GIT             = '/usr/bin/git'
 SVN             = '/usr/bin/svn'
+BASH            = '/bin/bash'
 PELICANFILES    = '/home/buildslave/slave/tools'
 SCRATCH_DIR     = '/tmp'
 PLUGINS         = '/opt/infrastructure-pelican/plugins'
-
 VERSION         = '0.28.3.gfm.12'
 LIBCMARKDIR     = f'/usr/local/asfpackages/cmark-gfm/cmark-gfm-{VERSION}/lib'
 if not os.path.exists(LIBCMARKDIR):
     # Fail, if a path to the CMARK library is not in ENVIRON.
     LIBCMARKDIR = os.environ['LIBCMARKDIR']
-
 THIS_DIR        = os.path.abspath(os.path.dirname(__file__))
 
 IS_PRODUCTION   = os.path.exists(PELICANFILES)
@@ -43,14 +50,20 @@
 AUTO_SETTINGS_YAML = 'pelicanconf.yaml'
 AUTO_SETTINGS_TEMPLATE = 'pelican.auto.ezt'
 AUTO_SETTINGS = 'pelican.auto.py'
-AUTO_SETTINGS_HELP = 'pelicanconf.md'
+AUTO_SETTINGS_HELP = 'https://github.com/apache/infrastructure-pelican/blob/master/pelicanconf.md'
+
+# default config file name
+PELICAN_CONF = 'pelicanconf.py'
+class _helper:
+    def __init__(self, **kw):
+        vars(self).update(kw)
 
 
 def start_build(args):
     """ The actual build steps """
 
     path = os.path.join(SCRATCH_DIR, args.project)
-    
+
     # Set up virtual environment
     print("Setting up virtual python environment in %s" % path)
     venv.create(path, clear=True, symlinks=True, with_pip=False)
@@ -61,6 +74,16 @@
     subprocess.run((GIT, 'clone', '--branch', args.sourcebranch, '--depth=1', '--no-single-branch', args.source, sourcepath),
                    check=True)
 
+    # Check for minimum page count setting in .asf.yaml, which overrides if cli arg is 0 - INFRA-24226.
+    minimum_page_count = args.count
+    asfyaml_path = os.path.join(sourcepath, '.asf.yaml')
+    if os.path.isfile(asfyaml_path):
+        asfyaml = yaml.safe_load(open(asfyaml_path))
+        pelican_asfyaml_section = asfyaml.get("pelican", {})
+        if pelican_asfyaml_section and minimum_page_count <= 0:
+            minimum_page_count = pelican_asfyaml_section.get("minimum_page_count", minimum_page_count)
+
+
     # Activate venv and install pips if needed. For dev/test, we will
     # assume that all requirements are available at the system level,
     # rather than needing to install them into the venv.
@@ -69,7 +92,7 @@
     ### production buildbot is not difficult to correct.
     if IS_PRODUCTION and os.path.exists(os.path.join(sourcepath, 'requirements.txt')):
         print("Installing pips")
-        subprocess.run(('/bin/bash', '-c',
+        subprocess.run((BASH, '-c',
                         'source bin/activate; pip3 install -r source/requirements.txt'),
                        cwd=path, check=True)
     else:
@@ -82,6 +105,24 @@
         tool_dir = THIS_DIR
     print("TOOLS:", tool_dir)
 
+    ### content_dir isn't quite right either. generate_settings() needs a
+    ### better definition of its sourcepath. And we need a proper definition
+    ### of content_dir to pass to PELICAN.
+    ### gonna brute force for now, to validate some thinking, then refine.
+
+    ### content_dir is where the PAGES are located
+    ### settings_dir is the root of themes and plugins
+
+    # Where is the content located?
+    ### for now, just look for some possibilities. This should come from
+    ### the .yaml or something.
+    content_dir = os.path.join(sourcepath, 'content')
+    settings_dir = sourcepath
+    if not os.path.exists(content_dir):
+        content_dir = os.path.join(sourcepath, 'site')
+        assert os.path.exists(content_dir)
+        settings_dir = content_dir
+
     pelconf_yaml = os.path.join(sourcepath, AUTO_SETTINGS_YAML)
     if os.path.exists(pelconf_yaml):
         settings_path = os.path.join(path, AUTO_SETTINGS)
@@ -89,10 +130,10 @@
             builtin_plugins = PLUGINS
         else:
             builtin_plugins = os.path.join(tool_dir, os.pardir, 'plugins')
-        generate_settings(pelconf_yaml, settings_path, [ builtin_plugins ], sourcepath)
+        generate_settings(pelconf_yaml, settings_path, [ builtin_plugins ], settings_dir)
     else:
         # The default name, but we'll pass it explicitly.
-        settings_path = os.path.join(sourcepath, 'pelicanconf.py')
+        settings_path = os.path.join(sourcepath, PELICAN_CONF)
 
         # Set currently supported plugins
         ### this needs to be removed, as it is too indeterminate.
@@ -107,10 +148,10 @@
     # Call pelican
     buildpath = os.path.join(path, 'build/output')
     os.makedirs(buildpath, exist_ok = True)
-    buildcmd = ('/bin/bash', '-c',
+    buildcmd = (BASH, '-c',
                 'source bin/activate; cd source && '
                 ### note: adding --debug can be handy
-                f'(pelican content --settings {settings_path} -o {buildpath})',
+                f'(pelican {content_dir} --settings {settings_path} -o {buildpath})',
                 )
     print("Building web site with:", buildcmd)
     env = os.environ.copy()
@@ -119,8 +160,8 @@
 
     count = len(glob.glob(f'{buildpath}/**/*.html', recursive=True))
     print(f"{count} html files.")
-    if args.count > 0 and args.count > count:
-        print("Not enough html pages in the Web Site. Minimum %s > %s found in the Web Site." % (args.count, count))
+    if minimum_page_count > 0 and minimum_page_count > count:
+        print("Not enough html pages in the Web Site. Minimum %s > %s found in the Web Site." % (minimum_page_count, count))
         sys.exit(4)
 
     # Done for now
@@ -189,41 +230,48 @@
 
 def build_dir(args):
 
-    path = sourcepath = '.'
+    # Where to place the automatically-generated AUTO_SETTINGS file (pelican.auto.py)
+    auto_dir = '.'
+
+    # Where is the YAML file?
+    yaml_dir = args.yaml_dir
+
+    # Where is the content located?
+    content_dir = args.content_dir
 
     # Where are our tools?
     tool_dir = THIS_DIR
     print("TOOLS:", tool_dir)
 
-    pelconf_yaml = os.path.join(sourcepath, AUTO_SETTINGS_YAML)
+    pelconf_yaml = os.path.join(yaml_dir, AUTO_SETTINGS_YAML)
     if os.path.exists(pelconf_yaml):
-        settings_path = os.path.join(path, AUTO_SETTINGS)
+        settings_path = os.path.join(auto_dir, AUTO_SETTINGS)
         builtin_plugins = os.path.join(tool_dir, os.pardir, 'plugins')
-        generate_settings(pelconf_yaml, settings_path, [ builtin_plugins ], sourcepath)
+        generate_settings(pelconf_yaml, settings_path, [ builtin_plugins ])
+    elif os.path.exists(os.path.join(yaml_dir, PELICAN_CONF)):
+        settings_path = os.path.join(yaml_dir, PELICAN_CONF)
     else:
-        # The default name, but we'll pass it explicitly.
-        settings_path = os.path.join(sourcepath, 'pelicanconf.py')
-        print(f'You must convert {settings_path} to {pelconf_yaml}')
-        help_path = os.path.join(tool_dir, os.pardir, AUTO_SETTINGS_HELP)
-        with open(help_path, encoding='utf-8') as f:
-            print(f.read())
+        print(f'ERROR: {pelconf_yaml} is missing')
+        print(f'  see: {AUTO_SETTINGS_HELP}')
         sys.exit(4)
 
+
     if args.listen:
         pel_options = '-r -l -b 0.0.0.0'
     else:
         pel_options = ''
 
     # Call pelican
-    buildcmd = ('/bin/bash', '-c',
+    buildcmd = (BASH, '-c',
                 ### note: adding --debug can be handy
-                f'(pelican content --settings {settings_path} --o {args.output} {pel_options})',
+                f'(pelican {content_dir} --settings {settings_path} --o {args.output} {pel_options})',
                 )
     print("Building web site with:", buildcmd)
     env = os.environ.copy()
     env['LIBCMARKDIR'] = LIBCMARKDIR
     try:
-        subprocess.run(buildcmd, cwd=path, check=True, env=env)
+        ### is the cwd_necessary?
+        subprocess.run(buildcmd, cwd=auto_dir, check=True, env=env)
     except KeyboardInterrupt:
         pass
 
@@ -237,30 +285,58 @@
         'theme': os.path.join(sourcepath, ydata.get('theme', 'theme/apache')),
         'debug': str(ydata.get('debug', False)),
         })
+
+    content = ydata.get('content', { })
+    tdata['pages'] = content.get('pages')
+    tdata['static'] = content.get('static_dirs', [ '.', ])
+
     tdata['p_paths'] = builtin_p_paths
     tdata['use'] = ['gfm']
+
+    tdata['uses_sitemap'] = None
     if 'plugins' in ydata:
         if 'paths' in ydata['plugins']:
             for p in ydata['plugins']['paths']:
                 tdata['p_paths'].append(os.path.join(sourcepath, p))
+
         if 'use' in ydata['plugins']:
             tdata['use'] = ydata['plugins']['use']
 
-    if 'genid' in ydata:
-        class GenIdParams:
-            def setbool(self, name):
-                setattr(self, name, str(ydata['genid'].get(name, False)))
-            def setdepth(self, name):
-                setattr(self, name, ydata['genid'].get(name))
+        if 'sitemap' in ydata['plugins']:
+            sm = ydata['plugins']['sitemap']
+            sitemap_params =_helper(
+                    exclude=str(sm['exclude']),
+                    format=sm['format'],
+                    priorities=_helper(
+                        articles=sm['priorities']['articles'],
+                        indexes=sm['priorities']['indexes'],
+                        pages=sm['priorities']['pages'],
+                        ),
+                    changefreqs=_helper(
+                        articles=sm['changefreqs']['articles'],
+                        indexes=sm['changefreqs']['indexes'],
+                        pages=sm['changefreqs']['pages'],
+                        ),
+                    )
 
-        genid = GenIdParams()
-        genid.setbool('unsafe')
-        genid.setbool('metadata')
-        genid.setbool('elements')
-        genid.setbool('permalinks')
-        genid.setbool('tables')
-        genid.setdepth('headings_depth')
-        genid.setdepth('toc_depth')
+            tdata['uses_sitemap'] = 'yes'  # ezt.boolean
+            tdata['sitemap'] = sitemap_params
+            tdata['use'].append('sitemap')  # add the plugin
+
+    tdata['uses_index'] = None
+    if 'index' in tdata:
+        tdata['uses_index'] = 'yes'  # ezt.boolean
+
+    if 'genid' in ydata:
+        genid = _helper(
+                unsafe=str(ydata['genid'].get('unsafe', False)),
+                metadata=str(ydata['genid'].get('metadata', False)),
+                elements=str(ydata['genid'].get('elements', False)),
+                permalinks=str(ydata['genid'].get('permalinks', False)),
+                tables=str(ydata['genid'].get('tables', False)),
+                headings_depth=ydata['genid'].get('headings_depth'),
+                toc_depth=ydata['genid'].get('toc_depth'),
+                )
 
         tdata['uses_genid'] = 'yes'  # ezt.boolean()
         tdata['genid'] = genid
@@ -275,7 +351,7 @@
     tdata['uses_copy'] = None
     if 'setup' in ydata:
         sdata = ydata['setup']
-        
+
         # Load data structures into the pelican METADATA.
         if 'data' in sdata:
             tdata['uses_data'] = 'yes'  # ezt.boolean()
@@ -335,7 +411,7 @@
         print("ERROR: Could not acquire lock for project directory - is another build taking ages to complete?!")
         sys.exit(-1)
 
-    
+
 def main():
     #os.chdir('/tmp/nowhere')  ### DEBUG: make sure we aren't reliant on cwd
 
@@ -358,6 +434,8 @@
     parser_dir = subparsers.add_parser("dir", help = "Build source in current directory and optionally serve the result")
     parser_dir.add_argument("--output", help = "Pelican output path (default: %(default)s)", default = "site-generated")
     parser_dir.add_argument("--listen", help = "Pelican build in server mode (default: %(default)s)", action = "store_true")
+    parser_dir.add_argument('--yaml-dir', help='Where pelicanconf.yaml is located (default: %(default)s)', default='.')
+    parser_dir.add_argument('--content-dir', help='Where is the content located (default: %{default)s)', default='content')
     parser_dir.set_defaults(func=build_dir)
 
     args = parser.parse_args()

diff --git a/bin/kick_build.py b/bin/kick_build.py
index 1f73b70..fcebc7a 100755
--- a/bin/kick_build.py
+++ b/bin/kick_build.py

@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+# Trigger a buildbot run
+# Defaults to the scheduler 'pelican_websites'
+
 import argparse
 import re
 
@@ -22,15 +25,15 @@
 
 # The schedule/host we need to kick for a rebuild.
 ### maybe parameterize?
-SCHEDULER_NAME = 'pelican_websites'
+SCHEDULER_NAME_DEFAULT = 'pelican_websites'
 API_HOST = 'ci2.apache.org'
 
 
-def main(repo, sourcebranch, outputbranch, theme, notify, min_pages):
+def main(repo, sourcebranch, outputbranch, theme, notify, min_pages, scheduler_name):
 
     # Never build from asf-site.
     assert sourcebranch != 'asf-site'
-    
+
     # Infer project name from the repository name.
     ### this code and WSMAP should be centralized.
     m = re.match(r"(?:incubator-)?([^-.]+)", repo)
@@ -59,7 +62,7 @@
         },
     }
     print('Triggering pelican build...')
-    s.post(f'https://{API_HOST}/api/v2/forceschedulers/{SCHEDULER_NAME}', json=payload)
+    s.post(f'https://{API_HOST}/api/v2/forceschedulers/{scheduler_name}', json=payload)
 
 
 if __name__ == '__main__':
@@ -77,8 +80,10 @@
                         help='Where to email the build result message.')
     parser.add_argument('--min-pages', type=int, default=0,
                         help='Minimum number of generated pages.')
+    parser.add_argument('--scheduler-name', default=SCHEDULER_NAME_DEFAULT,
+                        help='Name of scheduler to trigger')
 
     args = parser.parse_args()
     print('ARGS:', args)
     main(args.repo, args.sourcebranch, args.outputbranch, args.theme,
-         args.notify, args.min_pages)
+         args.notify, args.min_pages, args.scheduler_name)

diff --git a/bin/local-pelican-site.sh b/bin/local-pelican-site.sh
new file mode 100755
index 0000000..a47a1d2
--- /dev/null
+++ b/bin/local-pelican-site.sh

@@ -0,0 +1,143 @@
+#!/bin/bash
+
+# Create a local Pelican build of an infrastructure-pelican-based site
+# and deploy it at http://localhost:8000
+#
+# requires pip3/python3, cmake, and a C compiler
+# known to work on linux/osx. probably works under WSL. 
+# will not work under basic Windows.
+
+# github prefix for cloning/updating repos
+GH="https://github.com/apache"
+
+# site_build directory path. use a /tmp dir by default
+SB="$HOME/pelican-local"
+
+# infrastructure-pelican repo
+IP="infrastructure-pelican"
+
+# build target site repo minus the .git suffix
+REPO=`basename $1 .git`
+
+echo "Using GitHub prefix: $GH"
+if [ "$1" = "" ] || [ $# -gt 1 ];
+then
+  echo "Usage: $0 site-repo"
+  echo "Example: $0 infrastructure-website"
+  exit -1
+fi
+
+echo "Starting build for $REPO"
+
+# make sure our tools exist
+echo "Checking dependencies..."
+if ! command -v cmake &> /dev/null
+then
+  echo "cmake not found! you need to install the cmake package"
+  exit -1
+elif ! command -v python3 &> /dev/null
+then
+  echo "python3 not found! you need to install the python3 package"
+  exit -1
+elif ! command -v pip3 &> /dev/null
+then
+  echo "pip3 not found! you need to insatll the pip3 package"
+  exit -1
+elif ! command -v pipenv &> /dev/null
+then
+  echo "pipenv not found! installing it for you..."
+  pip3 install pipenv > /dev/null 2>&1
+  if [ $? -eq 1 ];
+  then 
+    echo "pipenv installation failed!" 
+    exit -1
+  fi
+fi
+
+# create our build dir to hold our repos and cmark-gfm 
+if [ ! -d $SB ];
+then
+  mkdir $SB || 'echo "Creation of $SB failed!" && exit -1'
+  cd $SB
+else
+  cd $SB
+fi
+
+
+# clone or update the pelican and site repos as needed
+echo "Cloning repos..."
+
+if [ -d $IP ];
+then
+  echo "$IP exists - updating..."
+  cd $IP && git pull > /dev/null && cd .. 
+else
+  echo "Cloning $IP"
+  # Sometimes useful to add -b <branch> for buildsite testing
+  git clone $GH/$IP 2>&1 
+fi
+
+IP="$SB/$IP"
+
+if [ -d $REPO ];
+then
+  echo "$REPO exists - not updating in case there are local changes!"
+  echo "Perform a manual git pull to sync with upstream $REPO"
+  # cd $REPO && git pull > /dev/null && cd ..
+else
+  echo "Cloning $REPO"
+  git clone $GH/$REPO 2>&1
+fi
+REPO="$SB/$REPO"
+# deploy our pipenv if we haven't already
+# TBD: check timestamp on $IP/requirements.txt and auto-update pipenv deps
+# right now that process is manual
+
+if [ ! -f "Pipfile.lock" ];
+then
+  echo "Setting up pipenv..."
+  pipenv --three install -r $IP/requirements.txt > /dev/null 2>&1 || 'echo "pipenv install failed!" && exit -1'
+
+else
+  echo "Pipfile.lock found, assuming pipenv exists."
+  echo "Run pipenv install -r $IP/requirements.txt to update dependencies if needed."
+fi
+
+# figure out what version of cmark-gfm we need to use
+echo "Extracting cmark version..."
+VERSION=`grep ^VERSION ./infrastructure-pelican/bin/build-cmark.sh | cut -d '=' -f 2`
+
+# if we already built this version of cmark, don't build it again
+if [ $VERSION ];
+then
+  echo "Found version $VERSION"
+else
+  echo "cmark-gfm version string not found! this shouldn't happen."
+  exit -1
+fi
+
+if [ -d "cmark-gfm-$VERSION" ];
+then
+  echo "Using existing ${PWD}/cmark-gfm-$VERSION/lib"
+  export LIBCMARKDIR=${PWD}/cmark-gfm-$VERSION/lib
+else
+  echo "Building cmark-gfm..."
+  eval `./infrastructure-pelican/bin/build-cmark.sh 2>&1 | grep export | grep -v echo `
+fi
+
+# run the site build/deploy in our pipenv environment
+
+# Clean
+if [ -d "$(realpath $REPO)/site-generated" ] && [ -f "$(realpath $REPO)/pelican.auto.py" ];
+then
+  echo "Generated local site exists! Removing..."
+  rm -rf $(realpath $REPO)/site-generated $(realpath $REPO)/pelican.auto.py
+fi
+
+# Build
+cd $REPO
+pipenv run python3 $(realpath $IP)/bin/buildsite.py dir --yaml-dir $(realpath $REPO) --content-dir "$(realpath $REPO)/content"
+
+# Serve
+pipenv run python3 -m pelican content --settings $(realpath $REPO)/pelican.auto.py --o $(realpath $REPO)/site-generated -r -l -b 0.0.0.0
+

diff --git a/bin/pelican.auto.ezt b/bin/pelican.auto.ezt
index abc1b1c..9a40475 100644
--- a/bin/pelican.auto.ezt
+++ b/bin/pelican.auto.ezt

@@ -20,13 +20,15 @@
 PLUGINS = [ [for use]'[use]', [end] ]
 
 # All content is located at '.' (aka content/ )
-PAGE_PATHS = [ '.' ]
-STATIC_PATHS = [ '.' ]
+PAGE_PATHS = [ '[if-any pages][pages][else].[end]' ]
+STATIC_PATHS = [ [for static]'[static]', [end] ]
 
 # Where to place/link generated pages
+[if-any pages]
+PATH_METADATA = '[pages]/(?P<path_no_ext>.*)\\..*'
+[else]
 PATH_METADATA = '(?P<path_no_ext>.*)\\..*'
-### some sites have the pages in a subdir. TBD.
-#PATH_METADATA = 'pages/(?P<path_no_ext>.*)\\..*'
+[end]
 
 PAGE_SAVE_AS = '{path_no_ext}.html'
 
@@ -42,7 +44,11 @@
 ARCHIVES_SAVE_AS = ''
 
 # Disable articles by pointing to a (should-be-absent) subdir
-ARTICLE_PATHS = [ 'articles' ]
+ARTICLE_PATHS = [ 'blog' ]
+
+# needed to create blogs page
+ARTICLE_URL = 'blog/{slug}.html'
+ARTICLE_SAVE_AS = 'blog/{slug}.html'
 
 # Disable all processing of .html files
 READERS = { 'html': None, }
@@ -70,6 +76,24 @@
     'debug': [debug],
 }
 [end]
+
+[if-any uses_sitemap]
+SITEMAP = {
+    "exclude": [sitemap.exclude],
+    "format": "[sitemap.format]",
+    "priorities": {
+        "articles": [sitemap.priorities.articles],
+        "indexes": [sitemap.priorities.indexes],
+        "pages": [sitemap.priorities.pages]
+    },
+    "changefreqs": {
+        "articles": "[sitemap.changefreqs.articles]",
+        "indexes": "[sitemap.changefreqs.indexes]",
+        "pages": "[sitemap.changefreqs.pages]"
+    }
+}
+[end]
+
 [if-any uses_data]
 # Configure the asfdata plugin
 ASF_DATA = {
@@ -91,3 +115,9 @@
 [if-any uses_copy]
 ASF_COPY = [ [for copy]'[copy]', [end] ]
 [end]
+[if-any uses_index]
+# Configure the asfindex plugin
+ASF_INDEX = {
+    'index': '[index]',
+}
+[end]

diff --git a/pelicanconf.md b/pelicanconf.md
index 9c484d4..fc77449 100644
--- a/pelicanconf.md
+++ b/pelicanconf.md

@@ -1,74 +1,3 @@
 # Configuring Pelican ASF
 
-Conversion of pelicanconf.py to pelicanconf.yaml.
-
-See github.com/apache/template-site and inspect a full pelicanconf.yaml
-
-These are the sections:
-
-## Required
-
-```
-site:
-  name: Apache Template
-  description: Provides a template for projects wishing to use the Pelican ASF static content system
-  domain: template.apache.org
-  logo: images/logo.png
-  repository: https://github.com/apache/template-site/blob/main/content/
-  trademarks: Apache, the Apache feather logo, and "Project" are trademarks or registered trademarks
-
-theme: theme/apache
-```
-
-## Options
-
-### Plugins
-
-If you are using the standard plugins included in Pelican ASF then you can leave this section out.
-Your build will automatically include the `gfm` plugin.
-
-```
-plugins:
-  paths:
-    - theme/plugins
-  use:
-    - gfm
-```
-
-### Special setup
-
-These configure four different special features.
-
-```
-setup:
-  data: asfdata.yaml
-  run:
-    - /bin/bash shell.sh
-  ignore:
-    - README.md
-    - include
-    - docs
-  copy:
-    - docs
-```
-
-1. data - uses `asfdata` plugin to build a data model to use in `ezmd` files. www-site is the best example.
-2. run - uses `asfshell` plugin to run scripts. httpd-site's security vulnerability processing is the best example.
-3. ignore - sets Pelican's IGNORE_FILES setting.
-4. copy - uses `asfcopy` plugin to copy static files outside of the pelican process. Include these in ignore as well.
-   This is useful if you have large files or many static files.
-
-## Generate ID
-
-The `asfgenid` plugin performs a number of fixups and enhancements. See ASF_GENID in your `pelicanconf.py` and convert.
-
-```
-genid:
-  unsafe: yes
-  metadata: yes
-  elements: yes
-  headings_depth: 4
-  permalinks: yes
-  toc_depth: 4
-  tables: yes
-```
+See <a href="https://infra.apache.org/asf-pelican-config.html" target="_blank">Configuring ASF Pelican</a> for configuration details.

diff --git a/plugins/age_days_lt/__init__.py b/plugins/age_days_lt/__init__.py
new file mode 100644
index 0000000..dd32091
--- /dev/null
+++ b/plugins/age_days_lt/__init__.py

@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+The age_in_days plugin adds a Jinja test, age_days_lt.
+
+It is intended to be used in Pelican templates like this to select articles newer than 90 days:
+
+    {% for article in (articles | selectattr("date", "age_days_lt", 90) ) %}
+        ...
+    {% endif %}
+"""
+from pelican import signals
+from . import agedayslt
+
+def add_test(pelican):
+    """Add age_days_lt test to Pelican."""
+    pelican.env.tests.update({'age_days_lt': agedayslt.age_days_lt})
+
+
+def register():
+    """Plugin registration."""
+    signals.generator_init.connect(add_test)

diff --git a/plugins/age_days_lt/agedayslt.py b/plugins/age_days_lt/agedayslt.py
new file mode 100644
index 0000000..c191df5
--- /dev/null
+++ b/plugins/age_days_lt/agedayslt.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from datetime import datetime
+
+def age_days_lt(dt, days):
+    """Return true if a number of days since 'dt' < 'days'"""
+    now = datetime.now(dt.tzinfo)
+    delta = now - dt
+    return delta.days < days

diff --git a/plugins/asfdata.py b/plugins/asfdata.py
index 5ecdcef..9ef530a 100644
--- a/plugins/asfdata.py
+++ b/plugins/asfdata.py

@@ -36,6 +36,7 @@
 import ezt
 
 import xml.dom.minidom
+import xml.parsers.expat
 
 import pelican.plugins.signals
 import pelican.utils
@@ -47,6 +48,8 @@
     (re.compile(r'&gt;'), '>'),
 ]
 
+# Format of svn ls -v output: Jan 1 1970
+SVN_DATE_FORMAT = "%b %d %Y"
 
 # read the asfdata configuration in order to get data load and transformation instructions.
 def read_config(config_yaml, debug):
@@ -321,6 +324,10 @@
         else:
             print(f'{seq} - split requires an existing sequence to split')
 
+    if 'truncate' in sequence:
+        multiple = int(sequence["truncate"])
+        reference = int(reference / multiple) * multiple
+
     # if this not already a sequence or dictionary then convert to a sequence
     if not is_sequence and not is_dictionary:
         # convert the dictionary/list to a sequence of objects
@@ -330,12 +337,14 @@
             reference = sequence_dict(seq, reference)
         elif isinstance(reference, list):
             reference = sequence_list(seq, reference)
-        else:
-            print(f'{seq}: cannot proceed invalid type, must be dict or list')
 
     # save sequence in metadata
     if save_metadata:
         metadata[seq] = reference
+        try:
+          metadata[f'{seq}_size'] = len(reference)
+        except TypeError: # allow for integer
+          pass
 
 
 # create metadata sequences and dictionaries from a data load
@@ -394,13 +403,13 @@
             # user = listing[1]
             if listing[-6] == '':
                 # dtm in the past year
-                dtm1 = datetime.datetime.strptime(" ".join(listing[-4:-2]) + " " + str(gatherYear), "%b %d %Y")
+                dtm1 = datetime.datetime.strptime(" ".join(listing[-4:-2]) + " " + str(gatherYear), SVN_DATE_FORMAT)
                 if dtm1 > gatherDate:
-                    dtm1 = datetime.datetime.strptime(" ".join(listing[-4:-2]) + " " + str(gatherYear - 1), "%b %d %Y")
+                    dtm1 = datetime.datetime.strptime(" ".join(listing[-4:-2]) + " " + str(gatherYear - 1), SVN_DATE_FORMAT)
                 fsize = listing[-5]
             else:
                 # dtm older than one year
-                dtm1 = datetime.datetime.strptime(" ".join(listing[-5:-1]), "%b %d %Y")
+                dtm1 = datetime.datetime.strptime(" ".join(listing[-5:-1]), SVN_DATE_FORMAT)
                 fsize = listing[-6]
             # date is close enough
             dtm = dtm1.strftime("%m/%d/%Y")
@@ -481,7 +490,7 @@
     """http://www.python.org/doc/2.5.2/lib/minidom-example.txt"""
     rc = ''
     for node in nodelist:
-        if node.nodeType == node.TEXT_NODE:
+        if node.nodeType in [node.CDATA_SECTION_NODE, node.TEXT_NODE]:
             rc = rc + node.data
     return rc
 
@@ -509,11 +518,15 @@
     if debug:
         print(f'blog feed: {feed}')
     content = requests.get(feed).text
-    dom = xml.dom.minidom.parseString(content)
-    # dive into the dom to get 'entry' elements
-    entries = dom.getElementsByTagName('entry')
-    # we only want count many from the beginning
-    entries = entries[:count]
+    # See INFRA-23636: cannot check the page status, so just catch parsing errors
+    try:
+        dom = xml.dom.minidom.parseString(content)
+        # dive into the dom to get 'entry' elements
+        entries = dom.getElementsByTagName('entry')
+        # we only want count many from the beginning
+        entries = entries[:count]
+    except xml.parsers.expat.ExpatError:
+        entries = []
     v = [ ]
     for entry in entries:
         if debug:
@@ -568,17 +581,28 @@
         print(f'-----\ntwitter feed: {handle}')
     bearer_token = twitter_auth()
     if not bearer_token:
-        return sequence_list('twitter',{
+        print('WARN: no bearer token for Twitter')
+        return sequence_list('twitter',[{
             'text': 'To retrieve tweets supply a valid twitter bearer token in ~/.authtokens'
-        })
+        }])
     # do not print or display bearer_token as it is a secret
     query = f'from:{handle}'
     tweet_fields = 'tweet.fields=author_id'
     url = f'https://api.twitter.com/2/tweets/search/recent?query={query}&{tweet_fields}'
     headers = {'Authorization': f'Bearer {bearer_token}'}
     load = connect_to_endpoint(url, headers)
+    result_count = load['meta']['result_count']
+    if result_count == 0:
+        print(f'WARN: No recent tweets for {handle}')
+        return sequence_list('twitter',[{ 'text': 'No recent tweets found' }])
+    if 'data' not in load:
+        print('WARN: "data" not in Twitter response')
+        print(load) # DEBUG; should not happen if result_count > 0
+        return sequence_list('twitter',[{
+            'text': 'Unable to extract Twitter data'
+        }])
     reference = sequence_list('twitter', load['data'])
-    if load['meta']['result_count'] < count:
+    if result_count < count:
         v = reference
     else:
         v = reference[:count]
@@ -589,7 +613,10 @@
 def process_eccn(fname, debug):
     if debug:
         print('-----\nECCN:', fname)
-    j = yaml.safe_load(open(fname))
+    if fname.startswith("https://"):
+        j = yaml.safe_load(requests.get(fname).text)
+    else:
+        j = yaml.safe_load(open(fname))
 
     # versions have zero or more controlled sources
     def make_sources(sources):

diff --git a/plugins/asfindex.py b/plugins/asfindex.py
new file mode 100644
index 0000000..2d32873
--- /dev/null
+++ b/plugins/asfindex.py

@@ -0,0 +1,151 @@
+#!/usr/bin/python -B
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# asfindex.py - Pelican plugin that generates indexes
+#
+
+import sys
+import subprocess
+import shlex
+import io
+import os
+import os.path
+import traceback
+
+import pelican.plugins.signals
+import pelican.settings
+from pelican.contents import Article, Page, Static
+from pelican.generators import (ArticlesGenerator,  # noqa: I100
+                                PagesGenerator, SourceFileGenerator,
+                                StaticGenerator, TemplatePagesGenerator)
+
+
+# get setting
+#  Settings are for the whole pelican environment.
+def get_setting(generators, setting):
+    try:
+        for g in generators:
+            if isinstance(g, PagesGenerator):
+                return g.settings[setting]
+    except Exception:
+        return None
+
+
+# set context
+#  Context are the processed settings and other environment which is made available to the JINJA template.
+#  Changes to the settings have no effect as those are already copied to each generator's context.
+def set_context(generators, setting, value):
+    for g in generators:
+        if isinstance(g, PagesGenerator):
+            g.context[setting] = value
+            return value
+    return None
+
+
+# get pages
+#  The PagesGenerator has a list of pages. Retrieve a sorted array of page information
+def get_pages(generators):
+    site_index = []
+    for g in generators:
+        if isinstance(g, PagesGenerator):
+            for p in g.pages:
+                # use an absolute path 
+                save_as = '/' + p.save_as
+                if save_as.endswith('/index.html'):
+                    # use "/" for the filename of index.html files assuring that they are first in a folder's list
+                    save_as = save_as[:-10]
+                # extract the path name
+                path, page = os.path.split(save_as)
+                site_index.append((path, save_as, p.title))
+    site_index.sort()
+    return site_index
+
+
+# get site index
+def get_index(site_index, scope):
+    current_folder = None
+    started = False
+    site_listing = ''
+    if not scope:
+        return
+    scoped = False
+    if scope != '**':
+        scoped = True
+    for p in site_index:
+        path, page = os.path.split(p[0])
+        folder = page.capitalize()
+        if not scoped or (scoped and p[0].startswith(scope)):
+            if folder != current_folder:
+                if started:
+                    site_listing += '</ol>\n'
+                started = True
+                site_listing += f'<h3><a href="{p[1]}">{p[2]}</a></h3>\n'
+                site_listing += '<ol>\n'
+                current_folder = folder
+            else:
+                # menu item for page
+                site_listing += f'<li><a href="{p[1]}">{p[2]}</a></li>\n'
+    if started:
+        site_listing += '</ol>\n'
+    return site_listing
+
+
+# get site menu
+# def get_menu(site_index, menus):
+#     currrent_menu = None
+#     site_menu = ''
+#     if menus:
+#         for f in menus:
+#             path, page = os.path.split(f)
+#             folder = page.capitalize()
+#             site_menu += '<li class="nav-item active dropdown">\n'
+#             site_menu += f'<a class="nav-link dropdown-toggle" href="#" id="dropdown{folder}" '
+#             site_menu += f'role="button" data-toggle="dropdown" aria-expanded="false">{folder}</a>\n'
+#             site_menu += f'<ul class="dropdown-menu" aria-labelledby="dropdown{folder}">\n'
+#             for p in site_index:
+#                 if p[0] == f:
+#                     # menu item for page
+#                     site_menu += f'<li><a class="dropdownitem" href="{p[1]}">{p[2]}</a></li>\n'
+#             site_menu += '</ul></li>\n'
+#     return site_menu
+#
+#
+# show pages
+def show_pages(generators):
+    site_index = get_pages(generators)
+    asf_index = get_setting(generators, 'ASF_INDEX')
+    print(asf_index)
+    # Not currently interested in menus this way as it is not generalizable
+    # set_context(generators, 'SITE_MENU', get_menu(site_index, asf_index['menus']))
+    set_context(generators, 'SITE_INDEX', get_index(site_index, asf_index['index']))
+
+
+def tb_finalized(generators):
+    """ Print any exception, before Pelican chews it into nothingness."""
+    try:
+        show_pages(generators)
+    except Exception:
+        print('-----', file=sys.stderr)
+        traceback.print_exc()
+        # exceptions here stop the build
+        raise
+
+
+def register():
+    pelican.plugins.signals.all_generators_finalized.connect(tb_finalized)

diff --git a/plugins/asfrun.py b/plugins/asfrun.py
index d7abce5..42ae89d 100644
--- a/plugins/asfrun.py
+++ b/plugins/asfrun.py

@@ -23,8 +23,6 @@
 import sys
 import subprocess
 import shlex
-import io
-import os
 import traceback
 
 import pelican.plugins.signals

diff --git a/plugins/extract_toc/__init__.py b/plugins/extract_toc/__init__.py
new file mode 100644
index 0000000..52c5778
--- /dev/null
+++ b/plugins/extract_toc/__init__.py

@@ -0,0 +1 @@
+from .extract_toc import *

diff --git a/plugins/extract_toc/extract_toc.py b/plugins/extract_toc/extract_toc.py
new file mode 100644
index 0000000..38f11eb
--- /dev/null
+++ b/plugins/extract_toc/extract_toc.py

@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""
+Extract Table of Content
+========================
+A Pelican plugin to extract table of contents (ToC) from `article.content` and
+place it in its own `article.toc` variable for use in templates.
+"""
+
+from os import path
+from bs4 import BeautifulSoup
+from pelican import signals, readers, contents
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def extract_toc(content):
+    if isinstance(content, contents.Static):
+        return
+
+    soup = BeautifulSoup(content._content, 'html.parser')
+    filename = content.source_path
+    extension = path.splitext(filename)[1][1:]
+    toc = None
+
+    # default Markdown reader
+    if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:
+        toc = soup.find('div', class_='toc')
+        if toc:
+            toc.extract()
+            if len(toc.find_next('ul').find_all('li')) == 0:
+                toc = None
+
+    # default reStructuredText reader
+    if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:
+        toc = soup.find('div', class_='contents topic')
+        if toc:
+            toc.extract()
+            tag = BeautifulSoup(str(toc), 'html.parser')
+            tag.div['class'] = 'toc'
+            tag.div['id'] = ''
+            p = tag.find('p', class_='topic-title first')
+            if p:
+                p.extract()
+            toc = tag
+
+    # Pandoc reader (markdown and other formats)
+    if 'pandoc_reader' in content.settings['PLUGINS']:
+        try:
+            from pandoc_reader import PandocReader
+        except ImportError:
+            PandocReader = False
+        if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:
+            toc = soup.find('nav', id='TOC')
+
+    if toc:
+        toc.extract()
+        content._content = soup.decode()
+        content.toc = toc.decode()
+        if content.toc.startswith('<html>'):
+            content.toc = content.toc[12:-14]
+
+
+def register():
+    signals.content_object_init.connect(extract_toc)

diff --git a/plugins/jinja2content/__init__.py b/plugins/jinja2content/__init__.py
new file mode 100644
index 0000000..de025c4
--- /dev/null
+++ b/plugins/jinja2content/__init__.py

@@ -0,0 +1 @@
+from .jinja2content import *

diff --git a/plugins/jinja2content/jinja2content.py b/plugins/jinja2content/jinja2content.py
new file mode 100644
index 0000000..44ff18b
--- /dev/null
+++ b/plugins/jinja2content/jinja2content.py

@@ -0,0 +1,67 @@
+"""
+jinja2content.py
+----------------
+Pelican plugin that processes Markdown files as jinja templates.
+"""
+
+from jinja2 import Environment, FileSystemLoader, ChoiceLoader
+import os
+from pelican import signals
+from pelican.readers import MarkdownReader, HTMLReader, RstReader
+from pelican.utils import pelican_open
+from tempfile import NamedTemporaryFile
+
+class JinjaContentMixin:
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # will look first in 'JINJA2CONTENT_TEMPLATES', by default the
+        # content root path, then in the theme's templates
+        local_dirs = self.settings.get('JINJA2CONTENT_TEMPLATES', ['.'])
+        local_dirs = [os.path.join(self.settings['PATH'], folder)
+                      for folder in local_dirs]
+        theme_dir = os.path.join(self.settings['THEME'], 'templates')
+
+        loaders = [FileSystemLoader(_dir) for _dir
+                   in local_dirs + [theme_dir]]
+        if 'JINJA_ENVIRONMENT' in self.settings: # pelican 3.7
+            jinja_environment = self.settings['JINJA_ENVIRONMENT']
+        else:
+            jinja_environment = {
+                'trim_blocks': True,
+                'lstrip_blocks': True,
+                'extensions': self.settings['JINJA_EXTENSIONS']
+            }
+        self.env = Environment(
+            loader=ChoiceLoader(loaders),
+            **jinja_environment)
+
+
+    def read(self, source_path):
+        with pelican_open(source_path) as text:
+            text = self.env.from_string(text).render()
+
+        with NamedTemporaryFile(delete=False) as f:
+            f.write(text.encode())
+            f.close()
+            content, metadata = super().read(f.name)
+            os.unlink(f.name)
+            return content, metadata
+
+
+class JinjaMarkdownReader(JinjaContentMixin, MarkdownReader):
+    pass
+
+class JinjaRstReader(JinjaContentMixin, RstReader):
+    pass
+
+class JinjaHTMLReader(JinjaContentMixin, HTMLReader):
+    pass
+
+def add_reader(readers):
+    for Reader in [JinjaMarkdownReader, JinjaRstReader, JinjaHTMLReader]:
+        for ext in Reader.file_extensions:
+            readers.reader_classes[ext] = Reader
+
+def register():
+    signals.readers_init.connect(add_reader)

diff --git a/plugins/md_inline_extension/__init__.py b/plugins/md_inline_extension/__init__.py
new file mode 100644
index 0000000..2453fe9
--- /dev/null
+++ b/plugins/md_inline_extension/__init__.py

@@ -0,0 +1 @@
+from .inline import *

diff --git a/plugins/md_inline_extension/inline.py b/plugins/md_inline_extension/inline.py
new file mode 100644
index 0000000..0d73b0b
--- /dev/null
+++ b/plugins/md_inline_extension/inline.py

@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+"""
+Markdown Inline Extension For Pelican
+=====================================
+Extends Pelican's Markdown module
+and allows for customized inline HTML
+"""
+
+import os
+import sys
+
+from pelican import signals
+
+try:
+    from . pelican_inline_markdown_extension import PelicanInlineMarkdownExtension
+except ImportError as e:
+    PelicanInlineMarkdownExtension = None
+    print("\nMarkdown is not installed - inline Markdown extension disabled\n")
+
+def process_settings(pelicanobj):
+    """Sets user specified settings (see README for more details)"""
+
+    # Default settings
+    inline_settings = {}
+    inline_settings['config'] = {'[]':('', 'pelican-inline')}
+
+    # Get the user specified settings
+    try:
+        settings = pelicanobj.settings['MD_INLINE']
+    except:
+        settings = None
+
+    # If settings have been specified, add them to the config
+    if isinstance(settings, dict):
+        inline_settings['config'].update(settings)
+
+    return inline_settings
+
+def inline_markdown_extension(pelicanobj, config):
+    """Instantiates a customized Markdown extension"""
+
+    # Instantiate Markdown extension and append it to the current extensions
+    try:
+        if isinstance(pelicanobj.settings.get('MD_EXTENSIONS'), list):  # pelican 3.6.3 and earlier
+            pelicanobj.settings['MD_EXTENSIONS'].append(PelicanInlineMarkdownExtension(config))
+        else:
+            pelicanobj.settings['MARKDOWN'].setdefault('extensions', []).append(PelicanInlineMarkdownExtension(config))
+    except:
+        sys.excepthook(*sys.exc_info())
+        sys.stderr.write("\nError - the pelican Markdown extension failed to configure. Inline Markdown extension is non-functional.\n")
+        sys.stderr.flush()
+
+def pelican_init(pelicanobj):
+    """Loads settings and instantiates the Python Markdown extension"""
+
+    # If there was an error loading Markdown, then do not process any further
+    if not PelicanInlineMarkdownExtension:
+        return
+
+    # Process settings
+    config = process_settings(pelicanobj)
+
+    # Configure Markdown Extension
+    inline_markdown_extension(pelicanobj, config)
+
+def register():
+    """Plugin registration"""
+    signals.initialized.connect(pelican_init)

diff --git a/plugins/md_inline_extension/pelican_inline_markdown_extension.py b/plugins/md_inline_extension/pelican_inline_markdown_extension.py
new file mode 100644
index 0000000..a3a3eb3
--- /dev/null
+++ b/plugins/md_inline_extension/pelican_inline_markdown_extension.py

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+"""
+Pelican Inline Markdown Extension
+==================================
+An extension for the Python Markdown module that enables
+the Pelican Python static site generator to add inline patterns.
+"""
+
+import markdown
+import re
+
+from markdown.util import etree
+from markdown.util import AtomicString
+
+class PelicanInlineMarkdownExtensionPattern(markdown.inlinepatterns.Pattern):
+    """Inline Markdown processing"""
+
+    def __init__(self, pelican_markdown_extension, tag, pattern):
+        super(PelicanInlineMarkdownExtensionPattern,self).__init__(pattern)
+        self.tag = tag
+        self.config = pelican_markdown_extension.getConfig('config')
+
+    def handleMatch(self, m):
+        node = markdown.util.etree.Element(self.tag)
+        tag_attributes = self.config.get(m.group('prefix'), ('', 'pelican-inline'))
+        tag_class = 'pelican-inline'  # default class
+        tag_style = ''  # default is for no styling
+
+        if isinstance(tag_attributes, tuple):
+            tag_style = tag_attributes[0]
+            tag_class = tag_attributes[1] if len(tag_attributes) > 1 else ''
+        elif isinstance(tag_attributes, str):
+            tag_class = tag_attributes
+
+        if tag_class != '':
+            node.set('class', tag_class)
+        if tag_style!= '':
+            node.set('style', tag_style)
+
+        node.text = markdown.util.AtomicString(m.group('text'))
+
+        return node
+
+class PelicanInlineMarkdownExtension(markdown.Extension):
+    """A Markdown extension enabling processing in Markdown for Pelican"""
+    def __init__(self, config):
+
+        try:
+            # Needed for Markdown versions >= 2.5
+            self.config['config'] = ['{}', 'config for markdown extension']
+            super(PelicanInlineMarkdownExtension,self).__init__(**config)
+        except AttributeError:
+            # Markdown versions < 2.5
+            config['config'] = [config['config'], 'config for markdown extension']
+            super(PelicanInlineMarkdownExtension, self).__init__(config)
+
+    def extendMarkdown(self, md, md_globals):
+        # Regex to detect mathjax
+        config = self.getConfig('config')
+        patterns = []
+
+        # The following mathjax settings can be set via the settings dictionary
+        for key in config:
+            patterns.append(re.escape(key))
+
+        inline_regex = r'(?P<prefix>%s)(?P<text>.+?)\2' % ('|'.join(patterns))
+
+        # Process after escapes
+        md.inlinePatterns.add('texthighlight_inlined', PelicanInlineMarkdownExtensionPattern(self, 'span', inline_regex), '>emphasis2')

diff --git a/plugins/regex_replace/__init__.py b/plugins/regex_replace/__init__.py
new file mode 100644
index 0000000..f97c8a3
--- /dev/null
+++ b/plugins/regex_replace/__init__.py

@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from .regex_replace import *

diff --git a/plugins/regex_replace/regex_replace.py b/plugins/regex_replace/regex_replace.py
new file mode 100644
index 0000000..bbe544b
--- /dev/null
+++ b/plugins/regex_replace/regex_replace.py

@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Markdown regex_replace filter for pelican
+"""
+from pelican import signals
+import re
+
+# Custom filter method
+def regex_replace(s, find, replace):
+    return re.sub(find, replace, s)
+
+def add_filter(pelican):
+    """Add filter to Pelican."""
+    pelican.env.filters.update({'regex_replace': regex_replace})
+
+def register():
+    """Plugin registration."""
+    signals.generator_init.connect(add_filter)

diff --git a/plugins/spu.py b/plugins/spu.py
new file mode 100644
index 0000000..248406c
--- /dev/null
+++ b/plugins/spu.py

@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This is a collection of simple in-page callable tools for pelican.
+To use a function, use the following syntax in your markdown:
+` spu:command_name("arg1", "arg2", "arg3") `
+
+In HTML, you would do:
+<code> spu:command_name("arg1", "arg2") </code>
+
+command_name must match a respective spu_cmd_* command in python.
+"""
+try:
+    from pelican.plugins import signals
+except ImportError:
+    from pelican import signals
+import pelican.contents
+import requests
+import urllib.parse
+import fnmatch
+import re
+
+# List of subdomains deemed safe for spu:fetch()
+SPU_FETCH_SAFE_DOMAINS = ("*.apache.org",)
+
+
+def spu_cmd_fetch(args: list):
+    """Fetches an external URL and put the content where the call was made"""
+    url = args[0]
+    url_parsed = urllib.parse.urlparse(url)
+    is_safe = any(fnmatch.fnmatch(url_parsed.netloc, pattern) for pattern in SPU_FETCH_SAFE_DOMAINS)
+    if is_safe:
+        print("Fetching external resource " + url)
+        return requests.get(url).text
+    else:
+        print("Not fetching unsafe external resource " + url)
+        return ""
+
+
+def spu_sub(call):
+    my_functions = {k: v for k, v in globals().items() if callable(v) and k.startswith("spu_cmd_")}
+    cmd = call.group(1)
+    args = [x[1] for x in re.findall(r"(['\"]?)(.*?)\1(?:,\s*)?", call.group(2)) if x[1]]
+    fnc = "spu_cmd_" + cmd
+    if fnc in my_functions:
+        return my_functions[fnc](args)
+    return ""
+
+
+def spu_parse(instance: pelican.contents.Page):
+    if instance._content is not None:
+        instance._content = re.sub(
+            r"<code>\s*spu:([_a-z]+)\(((?:(['\"]?)(.*?)\3(?:,\s*)?)*)\s*?\)\s*<\/code>",
+            spu_sub,
+            instance._content,
+            flags=re.UNICODE,
+        )
+
+
+def register():
+    print("Simple Pelican Utils registered.")
+    signals.content_object_init.connect(spu_parse)

diff --git a/requirements.txt b/requirements.txt
index 37c8341..e479112 100644
--- a/requirements.txt
+++ b/requirements.txt

@@ -1,5 +1,5 @@
 pelican
-#pelican-sitemap
+pelican-sitemap # pelican plugin offering
 soupsieve # needed by BeautifulSoup4
 BeautifulSoup4 # needed by several plugins
 ezt # needed by several plugins and buildsite.py
commit	cde31d387529f3b2d4dc258501233426463014ef	[log] [tgz]
author	sebbASF <sebbASF@users.noreply.github.com>	Sat May 27 09:49:35 2023 +0100
committer	GitHub <noreply@github.com>	Sat May 27 09:49:35 2023 +0100
tree	5a22981f10a27d9a3ffad74f553319bf1af5507c
parent	9f5e6245ffc67ca6f31e5a34a354ebccc6a22771 [diff]
parent	de5892e8394e3fccc63946d41f75c9bc80b8cfcd [diff]