Merge branch 'master' into humbedooh/gfm-simplify
diff --git a/.github/workflows/issue62.yml b/.github/workflows/issue62.yml
new file mode 100644
index 0000000..defe77f
--- /dev/null
+++ b/.github/workflows/issue62.yml
@@ -0,0 +1,42 @@
+name: Unit test issue62 - correct metadata types
+on:
+ push:
+
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: true
+ matrix:
+ # Not sure it's worth testing on multiple Pythons
+ python-version: [3.8]
+ steps:
+ - uses: actions/checkout@master
+ with:
+ persist-credentials: false
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pelican[Markdown]==4.5.4
+ pip install -r requirements.txt
+ pip install 'MarkupSafe<2.1.0' # needed for Pelican 4.5.4
+ - name: Build GFM
+ run: |
+ bin/build-cmark.sh | grep LIBCMARKDIR > libcmark.sh
+ - name: Run test
+ run: |
+ source libcmark.sh
+ cd tests/issue62
+ echo "This is currently expected to fail with:"
+ echo " ERROR: Could not process ./one.md"
+ echo " | 'str' object has no attribute 'tzinfo'"
+ ./compare_meta.sh
diff --git a/.github/workflows/issue82.yml b/.github/workflows/issue82.yml
new file mode 100644
index 0000000..2b83d14
--- /dev/null
+++ b/.github/workflows/issue82.yml
@@ -0,0 +1,35 @@
+name: Unit test issue82 - asfdata corrupts output
+on:
+ push:
+
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: true
+ matrix:
+ # Not sure it's worth testing on multiple Pythons
+ python-version: [3.8]
+ steps:
+ - uses: actions/checkout@master
+ with:
+ persist-credentials: false
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pelican[Markdown]==4.5.4
+ pip install -r requirements.txt
+ pip install 'MarkupSafe<2.1.0' # needed for Pelican 4.5.4
+ - name: Run test
+ run: |
+ cd tests/issue82
+ bash runtest.sh
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
new file mode 100644
index 0000000..6af8e96
--- /dev/null
+++ b/.github/workflows/linting.yml
@@ -0,0 +1,40 @@
+name: Linting and MyPy
+on:
+ push:
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: true
+ matrix:
+ python-version: ["3.10", 3.8]
+ steps:
+ - uses: actions/checkout@master
+ with:
+ persist-credentials: false
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ pip install mypy
+ pip install types-PyYAML
+ pip install types-requests
+ pip install types-beautifulsoup4
+ pip install pylint
+ pip list
+ - name: Type testing with mypy
+ run: |
+ mypy --cache-dir /tmp/ --install-types
+ mypy --cache-dir /tmp/ --ignore-missing-imports bin/*.py plugins/*.py
+ - name: Testing with pylint
+ run: |
+ pylint bin/*.py plugins/*.py
diff --git a/.github/workflows/postrun.yml b/.github/workflows/postrun.yml
new file mode 100644
index 0000000..0cf9091
--- /dev/null
+++ b/.github/workflows/postrun.yml
@@ -0,0 +1,35 @@
+name: Unit test postrun update
+on:
+ push:
+
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: true
+ matrix:
+ # Not sure it's worth testing on multiple Pythons
+ python-version: [3.8]
+ steps:
+ - uses: actions/checkout@master
+ with:
+ persist-credentials: false
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pelican[Markdown]==4.5.4
+ pip install -r requirements.txt
+ pip install 'MarkupSafe<2.1.0' # needed for Pelican 4.5.4
+ - name: Run test
+ run: |
+ cd tests/postrun
+ bash runtest.sh
diff --git a/Dockerfile b/Dockerfile
index 23f5e67..6411796 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,8 +38,12 @@
# cd /tmp/<project>/source
# pelican -b '0.0.0.0' -l
#
+
+# Use the Python version as installed on CI pelican builders (2023-06-02)
+ARG PYTHON_VERSION=3.8.10
+
# Build basic Pelican image
-FROM python:3.9.5-slim-buster as pelican-asf
+FROM python:${PYTHON_VERSION}-slim-buster as pelican-asf
RUN apt update && apt upgrade -y
RUN apt install curl cmake build-essential -y
@@ -54,7 +58,9 @@
# Standard Pelican stuff
# rebase the image to save up to 230MB of image size
# image does not need curl, cmake and build-essential
-FROM python:3.9.5-slim-buster
+# Use the Python version as installed on CI pelican builders (2023-06-02)
+FROM python:${PYTHON_VERSION}-slim-buster
+
RUN apt update && apt upgrade -y
# git is used by `buildsite.py git`
@@ -64,16 +70,21 @@
# we likely do not need the following
# RUN apt install wget unzip fontconfig -y
-ARG PELICAN_VERSION=4.7.0
-RUN pip install pelican==${PELICAN_VERSION}
+# Use the Pelican version as installed on CI pelican builders (2023-06-02)
+ARG PELICAN_VERSION=4.5.4
+# Need markdown as fallback for gfm as documented in ASF.YAML
+RUN pip install pelican[markdown]==${PELICAN_VERSION}
+# [1] https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features#Git.asf.yamlfeatures-PelicanCMS
-# Copy the built cmark and ASF
+# Copy the built cmark and ASF
WORKDIR /tmp/pelican-asf
COPY --from=pelican-asf /tmp/pelican-asf .
COPY requirements.txt .
# Don't automatically load dependencies; please add them to requirements.txt instead
RUN pip install -r requirements.txt --no-deps
+# Could perhaps be added to requirements.txt but that would affect other uses
+RUN pip install 'MarkupSafe<2.1.0' # needed for Pelican 4.5.4
# Now add the local code; do this last to avoid unnecessary rebuilds
COPY bin bin
@@ -90,4 +101,7 @@
# Run Pelican
WORKDIR /site
+# Add settings for interactive use
+RUN { cat /tmp/pelican-asf/LIBCMARKDIR.sh; echo "alias buildsite='/tmp/pelican-asf/bin/buildsite.py dir --listen'"; } >>/root/.bashrc
+
ENTRYPOINT [ "/bin/bash", "-c", "source /tmp/pelican-asf/LIBCMARKDIR.sh && /tmp/pelican-asf/bin/buildsite.py dir --listen" ]
diff --git a/README.md b/README.md
index db60a47..f306a35 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
## Tools for using Pelican at the ASF
The infrastructure-pelican repository provides a customized process
-for working with Pelican-based websites at the ASF.
+for working with Pelican-based websites at the ASF.
See the <a href="https://infra.apache.org/asf-pelican-gettingstarted.html" target="_blank">getting started guide</a> for working with the ASF Pelican template.
diff --git a/bin/build-cmark.sh b/bin/build-cmark.sh
index cc35b6c..aece872 100755
--- a/bin/build-cmark.sh
+++ b/bin/build-cmark.sh
@@ -21,7 +21,7 @@
TARDIR="."
if [ "$2" != "" ]; then TARDIR="$2"; fi
-ARCHIVES="https://github.com/github/cmark/archive"
+ARCHIVES="https://github.com/github/cmark-gfm/archive/refs/tags"
LOCAL="${TARDIR}/cmark-gfm.$VERSION.orig.tar.gz"
# WARNING: this must agree with the parent directory in the tar file or the build will fail
diff --git a/bin/buildsite.py b/bin/buildsite.py
index b83de51..a393d42 100755
--- a/bin/buildsite.py
+++ b/bin/buildsite.py
@@ -134,24 +134,35 @@
else:
# The default name, but we'll pass it explicitly.
settings_path = os.path.join(sourcepath, PELICAN_CONF)
+ #print('SETTINGS_PATH:', settings_path)
+ if args.plugins:
+ ppaths = (f'PLUGIN_PATHS=["{args.plugins}",'
+ f' "{sourcepath}/theme/plugins"]')
+ else:
+ ppaths = ''
# Set currently supported plugins
### this needs to be removed, as it is too indeterminate.
with open(settings_path, 'a') as f:
- f.write("""
+ f.write(f"""
+{ppaths}
try:
PLUGINS += ['toc']
-except:
+except Exception: # TODO: narrow further to expected Exceptions
PLUGINS = ['toc', 'gfm']
""")
+ # --debug means exception traces are shown
+ # TODO: would like to be able to set this from the yaml settings file
+ dbg = '--debug' if args.debug else ''
+ delout = '--delete-output-directory' if args.delete else ''
+
# Call pelican
buildpath = os.path.join(path, 'build/output')
os.makedirs(buildpath, exist_ok = True)
buildcmd = (BASH, '-c',
'source bin/activate; cd source && '
- ### note: adding --debug can be handy
- f'(pelican {content_dir} --settings {settings_path} -o {buildpath})',
+ f'(pelican {content_dir} --settings {settings_path} -o {buildpath} {dbg} {delout})',
)
print("Building web site with:", buildcmd)
env = os.environ.copy()
@@ -179,7 +190,7 @@
print("- Doing fresh checkout of branch %s" % args.outputbranch)
subprocess.run((GIT, 'checkout', args.outputbranch, '-f'), check=True)
subprocess.run((GIT, 'pull'), check=True)
- except:
+ except Exception: # TODO: narrow further to expected Exceptions
print("- Branch %s does not exist (yet), creating it..." % args.outputbranch)
# If .asf.yaml exists, which it should, make a copy of it in memory for later
asfyml = os.path.join(sourcepath, '.asf.yaml')
@@ -203,7 +214,7 @@
subprocess.run((GIT, 'add', 'output/'), check=True)
# Check if there are any changes.
- cp = subprocess.run((GIT, 'diff', '--cached', '--quiet'))
+ cp = subprocess.run((GIT, 'diff', '--cached', '--quiet'), check = False) # checked below
if cp.returncode == 0:
# There were no differences reported.
print('Nothing new to commit. Ignoring this build.')
@@ -215,6 +226,8 @@
if IS_PRODUCTION:
print('- Pushing changes, for publishing')
subprocess.run((GIT, 'push', args.source, args.outputbranch), check=True)
+ else:
+ print('- NOT ON PRODUCTION. NOT PUSHED.')
print('Success. Done.')
# for dev/test provide viewing instructions
@@ -261,10 +274,14 @@
else:
pel_options = ''
+ # --debug means exception traces are shown
+ # TODO: would like to be able to set this from the yaml settings file
+ dbg = '--debug' if args.debug else ''
+ delout = '--delete-output-directory' if args.delete else ''
+
# Call pelican
buildcmd = (BASH, '-c',
- ### note: adding --debug can be handy
- f'(pelican {content_dir} --settings {settings_path} --o {args.output} {pel_options})',
+ f'(pelican {content_dir} --settings {settings_path} --o {args.output} {pel_options} {dbg} {delout})',
)
print("Building web site with:", buildcmd)
env = os.environ.copy()
@@ -276,7 +293,15 @@
pass
-def generate_settings(source_yaml, settings_path, builtin_p_paths=[], sourcepath='.'):
+def generate_settings(source_yaml, settings_path, builtin_p_paths=None, sourcepath='.'):
+ """Generate the Pelican settings file
+
+ :param source_yaml: the settings in YAML form
+ :param settings_path: the path name to generate
+ :param builtin_p_paths: list of plugin paths (defaults to [])
+ :param sourcepath: path to source (defaults to '.')
+
+ """
ydata = yaml.safe_load(open(source_yaml))
tdata = ydata['site'] # Easy to copy these simple values.
@@ -290,6 +315,8 @@
tdata['pages'] = content.get('pages')
tdata['static'] = content.get('static_dirs', [ '.', ])
+ if builtin_p_paths is None:
+ builtin_p_paths = []
tdata['p_paths'] = builtin_p_paths
tdata['use'] = ['gfm']
@@ -347,6 +374,7 @@
tdata['uses_data'] = None
tdata['uses_run'] = None
+ tdata['uses_postrun'] = None
tdata['uses_ignore'] = None
tdata['uses_copy'] = None
if 'setup' in ydata:
@@ -357,11 +385,17 @@
tdata['uses_data'] = 'yes' # ezt.boolean()
tdata['asfdata'] = sdata['data']
tdata['use'].append('asfdata') # add the plugin
- # Run the included scripts with the asfrun plugin.
+ # Run the included scripts with the asfrun plugin during initialize
if 'run' in sdata:
tdata['uses_run'] = 'yes' # ezt.boolean
tdata['run'] = sdata['run']
tdata['use'].append('asfrun') # add the plugin
+ # Run the included scripts with the asfrun plugin during finalize
+ if 'postrun' in sdata:
+ tdata['uses_postrun'] = 'yes' # ezt.boolean
+ tdata['postrun'] = sdata['postrun']
+ if not 'run' in sdata:
+ tdata['use'].append('asfrun') # add the plugin (if not already added)
# Ignore files avoids copying these files to output.
if 'ignore' in sdata:
tdata['uses_ignore'] = 'yes' # ezt.boolean
@@ -429,13 +463,18 @@
parser_git.add_argument("--outputbranch", help = "Web site repository branch to commit output to (default: %(default)s)", default = "asf-site")
parser_git.add_argument("--count", help = "Minimum number of html pages (default: %(default)s)", type = int, default = 0)
parser_git.add_argument("--listen", help = "Start pelican -l after build (default: %(default)s)", action = "store_true")
+ parser_git.add_argument("--debug", help = "Run pelican with debug flag (show full exception traces)", action = "store_true")
+ parser_git.add_argument("--delete", help = "Delete output directory first", action = "store_true")
+ parser_git.add_argument("--plugins", help = "Directory for global plugins")
parser_git.set_defaults(func=locked_build)
parser_dir = subparsers.add_parser("dir", help = "Build source in current directory and optionally serve the result")
parser_dir.add_argument("--output", help = "Pelican output path (default: %(default)s)", default = "site-generated")
parser_dir.add_argument("--listen", help = "Pelican build in server mode (default: %(default)s)", action = "store_true")
parser_dir.add_argument('--yaml-dir', help='Where pelicanconf.yaml is located (default: %(default)s)', default='.')
- parser_dir.add_argument('--content-dir', help='Where is the content located (default: %{default)s)', default='content')
+ parser_dir.add_argument('--content-dir', help='Where is the content located (default: %(default)s)', default='content')
+ parser_dir.add_argument("--debug", help = "Run pelican with debug flag (show full exception traces)", action = "store_true")
+ parser_dir.add_argument("--delete", help = "Delete output directory first", action = "store_true")
parser_dir.set_defaults(func=build_dir)
args = parser.parse_args()
diff --git a/bin/local-pelican-site.sh b/bin/local-pelican-site.sh
index a47a1d2..1a97540 100755
--- a/bin/local-pelican-site.sh
+++ b/bin/local-pelican-site.sh
@@ -4,7 +4,7 @@
# and deploy it at http://localhost:8000
#
# requires pip3/python3, cmake, and a C compiler
-# known to work on linux/osx. probably works under WSL.
+# known to work on linux/osx. probably works under WSL.
# will not work under basic Windows.
# github prefix for cloning/updating repos
@@ -48,13 +48,13 @@
echo "pipenv not found! installing it for you..."
pip3 install pipenv > /dev/null 2>&1
if [ $? -eq 1 ];
- then
- echo "pipenv installation failed!"
+ then
+ echo "pipenv installation failed!"
exit -1
fi
fi
-# create our build dir to hold our repos and cmark-gfm
+# create our build dir to hold our repos and cmark-gfm
if [ ! -d $SB ];
then
mkdir $SB || 'echo "Creation of $SB failed!" && exit -1'
@@ -70,11 +70,11 @@
if [ -d $IP ];
then
echo "$IP exists - updating..."
- cd $IP && git pull > /dev/null && cd ..
+ cd $IP && git pull > /dev/null && cd ..
else
echo "Cloning $IP"
# Sometimes useful to add -b <branch> for buildsite testing
- git clone $GH/$IP 2>&1
+ git clone $GH/$IP 2>&1
fi
IP="$SB/$IP"
diff --git a/bin/pelican.auto.ezt b/bin/pelican.auto.ezt
index 9a40475..7b8bbd8 100644
--- a/bin/pelican.auto.ezt
+++ b/bin/pelican.auto.ezt
@@ -36,6 +36,7 @@
PAGE_TRANSLATION_ID = None
# Disable unused Pelican features
+# N.B. These features are currently unsupported, see https://github.com/apache/infrastructure-pelican/issues/49
FEED_ALL_ATOM = None
INDEX_SAVE_AS = ''
TAGS_SAVE_AS = ''
@@ -105,9 +106,13 @@
}
[end]
[if-any uses_run]
-# Configure the asfrun plugin
+# Configure the asfrun plugin (initialization)
ASF_RUN = [ [for run]'[run]', [end] ]
[end]
+[if-any uses_postrun]
+# Configure the asfrun plugin (finalization)
+ASF_POSTRUN = [ [for postrun]'[postrun]', [end] ]
+[end]
[if-any uses_ignore]
# Configure ignore files
IGNORE_FILES = [ [for ignore]'[ignore]', [end] ]
diff --git a/devtest/compare.sh b/devtest/compare.sh
index 5f33662..41b61f4 100755
--- a/devtest/compare.sh
+++ b/devtest/compare.sh
@@ -5,29 +5,30 @@
# ./compare.sh gora ~/src/asf/gora-site main
#
-invoked=`dirname $0`
-absdir=`cd $invoked ; pwd`
-bindir=`dirname $absdir`/bin
-#echo $bindir
-
-project=$1
-source=$2 # URL or pathname to a local clone
-branch=$3
-
set -x
+invoked="`dirname $0`"
+absdir="`realpath $invoked`"
+bindir="`dirname $absdir`/bin"
+#echo "$bindir"
+
+project="$1"
+source="$2" # URL or pathname to a local clone
+branch="$3"
+
+### maybe skip if already defined in ENV?
export LIBCMARKDIR=/tmp/cm/cmark-gfm-0.28.3.gfm.12/lib
-cd $source
+cd "$source"
# Get the branch where pelicanconf.* lives
-git checkout $branch
+git checkout "$branch"
# Now build the site into /tmp/$project/
-$bindir/buildsite.py --project $project --source $source --sourcebranch $branch
+"$bindir/buildsite.py" git --project "$project" --source "$source" --sourcebranch "$branch" --plugins "$absdir"/plugins || exit 1
# Switch to what the current/prior website looks like
git checkout asf-site
# Give a quick summary of old/new site
-diff -rq output /tmp/$project/build/output
+diff -rq "$source/output" "/tmp/$project/source/output"
diff --git a/devtest/setup-plugins.sh b/devtest/setup-plugins.sh
new file mode 100755
index 0000000..8607ee0
--- /dev/null
+++ b/devtest/setup-plugins.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -x
+
+p6="$1"
+
+mkdir plugins
+cp "$p6/modules/pelican_asf/files/toc.py" plugins/
+cp -r "$p6/modules/pelican_asf/files/pelican-gfm" plugins/
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..a7c8178
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,5 @@
+[mypy-ezt.*]
+ignore_missing_imports = True
+
+[mypy-pelican.*]
+ignore_missing_imports = True
\ No newline at end of file
diff --git a/plugins/PLUGIN_ARCH.md b/plugins/PLUGIN_ARCH.md
index 77746b4..aef7731 100644
--- a/plugins/PLUGIN_ARCH.md
+++ b/plugins/PLUGIN_ARCH.md
@@ -10,7 +10,7 @@
```python
PLUGIN_PATHS = ['./theme/plugins']
- PLUGINS = ['asfgenid', 'asfshell', 'asfdata', 'gfm', 'asfreader', 'asfcopy']
+ PLUGINS = ['asfgenid', 'asfrun', 'asfdata', 'gfm', 'asfreader', 'asfcopy']
```
2. Init (initialized). At this point any ASF_DATA is read into a metadata dictionary made available in every page.
@@ -57,7 +57,7 @@
}
```
- - The [asfshell plugin](./asfshell.py) runs shell scripts
+ - The [asfrun plugin](./asfrun.py) runs shell scripts during initialization
```yaml
setup:
@@ -71,6 +71,20 @@
]
```
+ - The [asfrun plugin](./asfrun.py) also runs shell scripts during finalization
+
+ ```yaml
+ setup:
+ postrun:
+ - /bin/bash postshell.sh
+ ```
+
+ ```python
+ ASF_POSTRUN = [
+ '/bin/bash postshell.sh'
+ ]
+ ```
+
3. Readers (readers_init). Two important readers are set at this point. Readers are responsible for transforming page files to html and
providing a metadata dictionary
- GFMReader by the gfm plugin. Transforms GitHub Flavored Markdown(GFM) to HTML.
diff --git a/plugins/asfdata.py b/plugins/asfdata.py
index 9ef530a..1c98f6f 100644
--- a/plugins/asfdata.py
+++ b/plugins/asfdata.py
@@ -48,6 +48,8 @@
(re.compile(r'>'), '>'),
]
+REQUESTS_TIMEOUT = 5 # timeout for requests calls
+
# Format of svn ls -v output: Jan 1 1970
SVN_DATE_FORMAT = "%b %d %Y"
@@ -78,7 +80,8 @@
# load data source from a url.
def url_data(url, debug):
- return load_data( url, requests.get(url).text, debug)
+ print("url_data",url, debug)
+ return load_data( url, requests.get(url, timeout=REQUESTS_TIMEOUT).text, debug)
# load data source from a file.
@@ -122,7 +125,7 @@
reference[refs]['letter'] = letter
-# rotate a roster list singleton into an name and availid
+# convert a list singleton into an name and availid (e.g. chair and roster for officer positions)
def asfid_part(reference, part):
for refs in reference:
fix = reference[refs][part]
@@ -141,7 +144,7 @@
# the logo pattern includes a place to insert the project/podling key
logo = (parts[0].format(item.key_id))
# HEAD request
- response = requests.head('https://www.apache.org/' + logo)
+ response = requests.head('https://www.apache.org/' + logo, timeout=REQUESTS_TIMEOUT)
if response.status_code != 200:
# logo not found - use the default logo
logo = parts[1]
@@ -342,9 +345,9 @@
if save_metadata:
metadata[seq] = reference
try:
- metadata[f'{seq}_size'] = len(reference)
+ metadata[f'{seq}_size'] = len(reference)
except TypeError: # allow for integer
- pass
+ pass
# create metadata sequences and dictionaries from a data load
@@ -517,9 +520,9 @@
def process_blog(feed, count, words, debug):
if debug:
print(f'blog feed: {feed}')
- content = requests.get(feed).text
# See INFRA-23636: cannot check the page status, so just catch parsing errors
try:
+ content = requests.get(feed, timeout=REQUESTS_TIMEOUT).text
dom = xml.dom.minidom.parseString(content)
# dive into the dom to get 'entry' elements
entries = dom.getElementsByTagName('entry')
@@ -527,6 +530,8 @@
entries = entries[:count]
except xml.parsers.expat.ExpatError:
entries = []
+ except requests.exceptions.ConnectionError:
+ entries = []
v = [ ]
for entry in entries:
if debug:
@@ -569,7 +574,7 @@
# retrieve from twitter
def connect_to_endpoint(url, headers):
- response = requests.request('GET', url, headers=headers)
+ response = requests.request('GET', url, headers=headers, timeout=REQUESTS_TIMEOUT)
if response.status_code != 200:
raise Exception(response.status_code, response.text)
return response.json()
@@ -590,7 +595,11 @@
tweet_fields = 'tweet.fields=author_id'
url = f'https://api.twitter.com/2/tweets/search/recent?query={query}&{tweet_fields}'
headers = {'Authorization': f'Bearer {bearer_token}'}
- load = connect_to_endpoint(url, headers)
+ try:
+ load = connect_to_endpoint(url, headers)
+ except Exception as e:
+ print(f'ERROR: Cannot connect to Twitter for {handle}: {e}')
+ return sequence_list('twitter',[{ 'text': 'Cannot connect to Twitter at present' }])
result_count = load['meta']['result_count']
if result_count == 0:
print(f'WARN: No recent tweets for {handle}')
@@ -614,7 +623,7 @@
if debug:
print('-----\nECCN:', fname)
if fname.startswith("https://"):
- j = yaml.safe_load(requests.get(fname).text)
+ j = yaml.safe_load(requests.get(fname, timeout=REQUESTS_TIMEOUT).text)
else:
j = yaml.safe_load(open(fname))
@@ -785,7 +794,8 @@
for key in metadata:
if debug:
print(f'metadata[{key}] =')
- print(metadata[key])
+ pp = pprint.PrettyPrinter(indent=2)
+ pp.pprint(metadata[key])
print('-----')
elif isinstance(metadata[key], str):
print(f'metadata[{key}] = "{metadata[key]}"')
diff --git a/plugins/asfgenid.py b/plugins/asfgenid.py
index 7ce7108..e2188b6 100644
--- a/plugins/asfgenid.py
+++ b/plugins/asfgenid.py
@@ -124,12 +124,12 @@
# assure configuration
-def init_default_config(pelican):
+def init_default_config(pel_obj):
from pelican.settings import DEFAULT_CONFIG
DEFAULT_CONFIG.setdefault('ASF_GENID', ASF_GENID)
- if(pelican):
- pelican.settings.setdefault('ASF_GENID', ASF_GENID)
+ if(pel_obj):
+ pel_obj.settings.setdefault('ASF_GENID', ASF_GENID)
# from Apache CMS markdown/extensions/headerid.py - slugify in the same way as the Apache CMS
@@ -164,7 +164,7 @@
# fixup cmark content - note that this may be too hungry. It may need to occur later and skipped in codeblock and pre tags.
def fixup_content(content):
- text = content._content
+ text = content._content # pylint: disable=protected-access
modified = False
# Find messed up html
for regex, replace in FIXUP_UNSAFE:
@@ -173,7 +173,7 @@
modified = True
text = re.sub(regex, replace, text)
if modified:
- content._content = text
+ content._content = text # pylint: disable=protected-access
# expand metadata found in {{ key }}
@@ -283,7 +283,7 @@
parts = rel_source_path.split('/')
url = '/'
crumbs = []
- crumbs.append(f'<a href="/">Home</a> » ')
+ crumbs.append('<a href="/">Home</a> » ')
# don't process the filename part
last = len(parts)-1
for i in range(last):
@@ -292,7 +292,7 @@
crumbs.append(f'<a href="{url}">{p}</a> » ')
crumbs.append(f'<a href="#">{title}</a>')
return ''.join(crumbs)
-
+
# add the asfdata metadata into GFM content.
def add_data(content):
@@ -332,7 +332,7 @@
# step 2 - prepare for genid processes
# parse html content into BeautifulSoup4
- soup = BeautifulSoup(content._content, 'html.parser')
+ soup = BeautifulSoup(content._content, 'html.parser') # pylint: disable=protected-access
# page title
title = content.metadata.get('title', 'Title')
# assure relative source path is in the metadata
@@ -389,7 +389,7 @@
generate_toc(content, tags, title, asf_genid['toc_headers'], asf_genid['debug'])
# step 9 - reset the html content
- content._content = soup.decode(formatter='html')
+ content._content = soup.decode(formatter='html') # pylint: disable=protected-access
# step 10 - output all of the permalinks created
if asf_genid['debug']:
@@ -397,13 +397,13 @@
print(f' #{tag}')
-def tb_connect(pel_ob):
+def tb_connect(content_obj):
"""Print any exception, before Pelican chews it into nothingness."""
try:
- generate_id(pel_ob)
+ generate_id(content_obj)
except Exception:
print('-----', file=sys.stderr)
- print('FATAL: %s' % (pel_ob.relative_source_path), file=sys.stderr)
+ print('FATAL: %s' % (content_obj.relative_source_path), file=sys.stderr)
traceback.print_exc()
# if we have errors in this module then we want to quit to avoid erasing the site
sys.exit(4)
@@ -411,6 +411,4 @@
def register():
pelican.plugins.signals.initialized.connect(init_default_config)
-
-
-pelican.plugins.signals.content_object_init.connect(tb_connect)
+ pelican.plugins.signals.content_object_init.connect(tb_connect)
diff --git a/plugins/asfindex.py b/plugins/asfindex.py
index 2d32873..2826357 100644
--- a/plugins/asfindex.py
+++ b/plugins/asfindex.py
@@ -21,19 +21,13 @@
#
import sys
-import subprocess
-import shlex
-import io
import os
import os.path
import traceback
import pelican.plugins.signals
import pelican.settings
-from pelican.contents import Article, Page, Static
-from pelican.generators import (ArticlesGenerator, # noqa: I100
- PagesGenerator, SourceFileGenerator,
- StaticGenerator, TemplatePagesGenerator)
+from pelican.generators import PagesGenerator
# get setting
@@ -65,13 +59,13 @@
for g in generators:
if isinstance(g, PagesGenerator):
for p in g.pages:
- # use an absolute path
+ # use an absolute path
save_as = '/' + p.save_as
if save_as.endswith('/index.html'):
# use "/" for the filename of index.html files assuring that they are first in a folder's list
save_as = save_as[:-10]
# extract the path name
- path, page = os.path.split(save_as)
+ path, _page = os.path.split(save_as)
site_index.append((path, save_as, p.title))
site_index.sort()
return site_index
@@ -88,7 +82,7 @@
if scope != '**':
scoped = True
for p in site_index:
- path, page = os.path.split(p[0])
+ _path, page = os.path.split(p[0])
folder = page.capitalize()
if not scoped or (scoped and p[0].startswith(scope)):
if folder != current_folder:
diff --git a/plugins/asfreader.py b/plugins/asfreader.py
index 51dea47..9ac0c61 100644
--- a/plugins/asfreader.py
+++ b/plugins/asfreader.py
@@ -48,7 +48,7 @@
self.text = text
def read_other(self, relative):
- return ezt._FileReader(os.path.join(self.source_dir, relative))
+ return ezt._FileReader(os.path.join(self.source_dir, relative)) # pylint: disable=protected-access
def filename(self):
return self.fname
@@ -94,9 +94,9 @@
# prepare text as an ezt template
# compress_whitespace=0 is required as blank lines and indentation have meaning in markdown.
template = ezt.Template(compress_whitespace=0)
+ assert template
reader = ASFTemplateReader(source_path, text)
template.parse(reader, base_format=ezt.FORMAT_HTML)
- assert template
# generate content from ezt template with metadata
fp = io.StringIO()
template.generate(fp, metadata)
diff --git a/plugins/asfrun.py b/plugins/asfrun.py
index 42ae89d..1fab6d4 100644
--- a/plugins/asfrun.py
+++ b/plugins/asfrun.py
@@ -17,9 +17,10 @@
# under the License.
#
#
-# asfshell.py - Pelican plugin that runs shell scripts during initialization
+# asfrun.py - Pelican plugin that runs shell scripts during initialization or finalization
#
+import os
import sys
import subprocess
import shlex
@@ -30,20 +31,27 @@
# open a subprocess
-def os_run(args):
- return subprocess.Popen(args, stdout=subprocess.PIPE, universal_newlines=True)
-
+def os_run(args, env=None):
+ return subprocess.Popen(args, env=env, stdout=subprocess.PIPE, universal_newlines=True)
# run shell
-def run_script(pel_ob):
- asf_run = pel_ob.settings.get('ASF_RUN')
- if asf_run:
- print('-----\nasfshell')
- for command in asf_run:
+def run_script(pel_ob, command_source, env=False):
+ commands = pel_ob.settings.get(command_source)
+ if commands:
+ print(f'-----\nasfrun {command_source}')
+ if env:
+ # copy the pelican environment into the OS env
+ my_env = os.environ.copy()
+ for k, v in sorted(pel_ob.settings.items()):
+ if k != 'ASF_DATA': # rather large; not needed
+ my_env['PELICAN_'+k] = str(v)
+ else:
+ my_env = None
+ for command in commands:
print(f'-----\n{command}')
args = shlex.split(command)
print(args)
- with os_run(args) as s:
+ with os_run(args, my_env) as s:
for line in s.stdout:
line = line.strip()
print(f'{line}')
@@ -52,7 +60,17 @@
def tb_initialized(pel_ob):
""" Print any exception, before Pelican chews it into nothingness."""
try:
- run_script(pel_ob)
+ run_script(pel_ob, 'ASF_RUN')
+ except Exception:
+ print('-----', file=sys.stderr)
+ traceback.print_exc()
+ # exceptions here stop the build
+ raise
+
+def tb_finalized(pel_ob):
+ """ Print any exception, before Pelican chews it into nothingness."""
+ try:
+ run_script(pel_ob, 'ASF_POSTRUN', env=True)
except Exception:
print('-----', file=sys.stderr)
traceback.print_exc()
@@ -62,3 +80,4 @@
def register():
pelican.plugins.signals.initialized.connect(tb_initialized)
+ pelican.plugins.signals.finalized.connect(tb_finalized)
diff --git a/plugins/consensual_youtube.py b/plugins/consensual_youtube.py
new file mode 100644
index 0000000..3f0fd4b
--- /dev/null
+++ b/plugins/consensual_youtube.py
@@ -0,0 +1,149 @@
+#!/usr/bin/python -B
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# consensual_youtube.py -- Pelican plugin that supports postponing loading
+# youtube resources until the user explicitly opts into playing the video
+#
+# Include a YouTube video with a 'youtube' tag and a youtube_id attribute:
+#
+# <youtube youtube_id="GU0SV_2tWkU"/>
+#
+# You can specify the 'id' to make it easier to apply additional CSS. This id
+# will also be carried over to the iframe when the div is replaced.
+#
+# The preview image will be taken from `img/{youtube_id}.jpg` in your content
+# folder. If no preview image is found there, it will be fetched from youtube
+# at site generation time.
+from os import path
+
+from urllib import request
+
+from pelican import contents, signals
+
+from bs4 import BeautifulSoup
+
+CSS_STYLE = '''
+ .yt-container {
+ background-size: cover;
+ background-position: center;
+ }
+
+ .yt-notice {
+ padding: 0.5em;
+ color: black;
+ background-color: white;
+ }
+ .yt-notice::after {
+ content: "Clicking to play this video establishes a connection with YouTube";
+ }
+'''
+
+JS_SCRIPT = '''
+ var cachedPromise;
+ function loadScript() {
+ cachedPromise = cachedPromise || new Promise((resolve, reject) => {
+ let script = document.createElement('script');
+ script.src = 'https://www.youtube.com/iframe_api';
+ script.addEventListener('load', resolve);
+ script.addEventListener('error', (e) => reject(e));
+ document.body.appendChild(script);
+ });
+ return cachedPromise;
+ }
+
+ function startPlayer(id, youtube_id) {
+ loadScript().then(() => {
+ window.YT.ready(function() {
+ let player = new YT.Player(id, {
+ videoId: youtube_id,
+ playerVars: {
+ 'playsinline': 1
+ },
+ events: {
+ 'onReady': (event) => {
+ event.target.playVideo();
+ }
+ }
+ });
+ });
+ });
+ }
+ function addElement(e) {
+ startPlayer(e.target.id, e.target.attributes['youtube_id'].value);
+ }
+ document
+ .querySelectorAll('.yt-container')
+ .forEach((video) => video.addEventListener('click', addElement))
+'''
+
+def generate_youtube(content):
+ if isinstance(content, contents.Static):
+ return
+ soup = BeautifulSoup(content._content, 'html.parser') # pylint: disable=protected-access
+ tags = soup.find_all('youtube')
+
+ if not tags:
+ return
+
+ style = soup.new_tag('style')
+ style.append(CSS_STYLE)
+ soup.append(style)
+
+ script = soup.new_tag('script')
+ script.append(JS_SCRIPT)
+ soup.append(script)
+
+ for tag in tags:
+ replace_tag(content.settings['PATH'], content.settings['OUTPUT_PATH'], soup, tag)
+
+ content._content = soup.decode(formatter='html') # pylint: disable=protected-access
+
+def replace_tag(input_path, output_path, soup, tag):
+ tag.name = 'div'
+
+ if not tag.has_attr('youtube_id'):
+ raise ValueError('Attribute "youtube_id" is mandatory for "youtube" tags')
+
+ yt_id = tag['youtube_id']
+
+ # If a preview file is present in the input content directory,
+ # use that and rely on Pelican to copy it to the output. If not,
+ # fetch it from YouTube at site generation time and place it
+ # straight into the output directory:
+ preview = f'/img/yt_preview_{yt_id}.jpg'
+ if not path.isfile(input_path + preview):
+ request.urlretrieve(f'https://img.youtube.com/vi/{yt_id}/0.jpg',
+ output_path + preview)
+
+ # Default YouTube player size is 360p:
+ player_width = 640
+ player_height = 360
+
+ if not tag.has_attr('id'):
+ tag['id'] = f'yt-container-{yt_id}'
+
+ tag['class'] = 'yt-container'
+ tag['style'] = f"background-image: url('{preview}'); width: {player_width}px; height: {player_height}px;"
+
+ warning = soup.new_tag('div')
+ warning['class'] = "yt-notice"
+ tag.append(warning)
+
+def register():
+ signals.content_object_init.connect(generate_youtube)
diff --git a/plugins/gfm.py b/plugins/gfm.py
index 981bd2b..63e89f1 100644
--- a/plugins/gfm.py
+++ b/plugins/gfm.py
@@ -72,6 +72,7 @@
# Extract the metadata from the header of the text
lines = text.splitlines()
+ i = 0 # See https://github.com/apache/infrastructure-pelican/issues/70
for i in range(len(lines)):
line = lines[i]
match = GFMReader.RE_METADATA.match(line)
@@ -108,15 +109,15 @@
# read metadata and markdown content
text, metadata = self.read_source(source_path)
- assert text
- assert metadata
+ assert text, 'Text must not be empty'
+ assert metadata, 'Metadata must not be empty'
# Render the markdown into HTML
if sys.version_info >= (3, 0):
text = text.encode('utf-8')
content = self.render(text).decode('utf-8')
else:
content = self.render(text)
- assert content
+ assert content, 'Did not expect content to be empty'
return content, metadata
@@ -127,7 +128,6 @@
).encode("utf-8")
-
def add_readers(readers):
readers.reader_classes['md'] = GFMReader
diff --git a/plugins/md_inline_extension/inline.py b/plugins/md_inline_extension/inline.py
index 0d73b0b..049cbbf 100644
--- a/plugins/md_inline_extension/inline.py
+++ b/plugins/md_inline_extension/inline.py
@@ -27,7 +27,7 @@
# Get the user specified settings
try:
settings = pelicanobj.settings['MD_INLINE']
- except:
+ except Exception: # TODO: narrow further to expected Exceptions
settings = None
# If settings have been specified, add them to the config
@@ -45,7 +45,7 @@
pelicanobj.settings['MD_EXTENSIONS'].append(PelicanInlineMarkdownExtension(config))
else:
pelicanobj.settings['MARKDOWN'].setdefault('extensions', []).append(PelicanInlineMarkdownExtension(config))
- except:
+ except Exception: # TODO: narrow further to expected Exceptions
sys.excepthook(*sys.exc_info())
sys.stderr.write("\nError - the pelican Markdown extension failed to configure. Inline Markdown extension is non-functional.\n")
sys.stderr.flush()
diff --git a/plugins/spu.py b/plugins/spu.py
index 248406c..61cdf5d 100644
--- a/plugins/spu.py
+++ b/plugins/spu.py
@@ -38,6 +38,7 @@
# List of subdomains deemed safe for spu:fetch()
SPU_FETCH_SAFE_DOMAINS = ("*.apache.org",)
+REQUESTS_TIMEOUT = 5 # timeout for requests calls
def spu_cmd_fetch(args: list):
"""Fetches an external URL and put the content where the call was made"""
@@ -46,7 +47,7 @@
is_safe = any(fnmatch.fnmatch(url_parsed.netloc, pattern) for pattern in SPU_FETCH_SAFE_DOMAINS)
if is_safe:
print("Fetching external resource " + url)
- return requests.get(url).text
+ return requests.get(url, timeout=REQUESTS_TIMEOUT).text
else:
print("Not fetching unsafe external resource " + url)
return ""
@@ -63,11 +64,11 @@
def spu_parse(instance: pelican.contents.Page):
- if instance._content is not None:
- instance._content = re.sub(
+ if instance._content is not None: # pylint: disable=protected-access
+ instance._content = re.sub( # pylint: disable=protected-access
r"<code>\s*spu:([_a-z]+)\(((?:(['\"]?)(.*?)\3(?:,\s*)?)*)\s*?\)\s*<\/code>",
spu_sub,
- instance._content,
+ instance._content, # pylint: disable=protected-access
flags=re.UNICODE,
)
diff --git a/plugins/toc.py b/plugins/toc.py
index 22c18ad..95084c6 100644
--- a/plugins/toc.py
+++ b/plugins/toc.py
@@ -26,28 +26,28 @@
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
-def unique(id, ids):
+def unique(id_, ids):
""" Ensure id is unique in set of ids. Append '_1', '_2'... if not """
- while id in ids or not id:
- m = IDCOUNT_RE.match(id)
+ while id_ in ids or not id_:
+ m = IDCOUNT_RE.match(id_)
if m:
- id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
+ id_ = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
else:
- id = '%s_%d' % (id, 1)
- ids.add(id)
- return id
+ id_ = '%s_%d' % (id_, 1)
+ ids.add(id_)
+ return id_
'''
end
'''
class HtmlTreeNode(object):
- def __init__(self, parent, header, level, id):
+ def __init__(self, parent, header, level, id_):
self.children = []
self.parent = parent
self.header = header
self.level = level
- self.id = id
+ self.id = id_
def add(self, new_header, ids):
new_level = new_header.name
@@ -95,7 +95,7 @@
return ret
-def init_default_config(pelican):
+def init_default_config(pel_ob):
from pelican.settings import DEFAULT_CONFIG
TOC_DEFAULT = {
@@ -104,8 +104,8 @@
}
DEFAULT_CONFIG.setdefault('TOC', TOC_DEFAULT)
- if(pelican):
- pelican.settings.setdefault('TOC', TOC_DEFAULT)
+ if(pel_ob):
+ pel_ob.settings.setdefault('TOC', TOC_DEFAULT)
def generate_toc(content):
@@ -115,14 +115,14 @@
all_ids = set()
title = content.metadata.get('title', 'Title')
tree = node = HtmlTreeNode(None, title, 'h0', '')
- soup = BeautifulSoup(content._content, 'html.parser')
+ soup = BeautifulSoup(content._content, 'html.parser') # pylint: disable=protected-access
settoc = False
try:
header_re = re.compile(content.metadata.get(
'toc_headers', content.settings['TOC']['TOC_HEADERS']))
except re.error as e:
- logger.error("TOC_HEADERS '%s' is not a valid re\n%s",
+ logger.error("TOC_HEADERS '%s' is not a valid re\n",
content.settings['TOC']['TOC_HEADERS'])
raise e
@@ -133,7 +133,7 @@
settoc = True
node, new_header = node.add(header, all_ids)
header.replaceWith(new_header) # to get our ids back into soup
-
+
if settoc:
print("Generating ToC for %s" % content.slug)
tree_string = '{}'.format(tree)
@@ -142,10 +142,10 @@
itoc = soup.find('p', text = '[TOC]')
if itoc:
itoc.replaceWith(tree_soup)
-
- content._content = soup.decode(formatter='html')
+
+ content._content = soup.decode(formatter='html') # pylint: disable=protected-access
def register():
signals.initialized.connect(init_default_config)
-signals.content_object_init.connect(generate_toc)
+ signals.content_object_init.connect(generate_toc)
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 0000000..c9ad247
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,578 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=fcntl,unicodedata
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold to be exceeded before program exits with error.
+fail-under=10.0
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the ignore-list. The
+# regex matches against paths and can be in Posix or Windows format.
+ignore-paths=
+
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.10
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+ bad-inline-option,
+ locally-disabled,
+ file-ignored,
+ suppressed-message,
+ useless-suppression,
+ deprecated-pragma,
+ use-symbolic-message-instead,
+ attribute-defined-outside-init,
+ logging-fstring-interpolation, # Not essential
+ C,R,
+ R0903,R0913,R0912,R0914,R0915,
+ R1706, # ternary
+ W1406, # redundant u-string prefix
+ W0105, # String statement has no effect (pointless-string-statement)
+ W0511, # TODO etc
+ W0707, # Consider explicitly re-raising using the 'from' keyword (raise-missing-from)
+ W1514, # Using open without explicitly specifying an encoding (unspecified-encoding)
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the 'python-enchant' package.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear and the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+# notes=FIXME,
+# XXX,
+# TODO
+
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# class is considered mixin if its name matches the mixin-class-rgx option.
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,xml.parsers.expat
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins ignore-mixin-
+# members is set to 'yes'
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+ _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=no
+
+# Signatures are removed from the similarity computation
+ignore-signatures=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+ bar,
+ baz,
+ toto,
+ tutu,
+ tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+ j,
+ k,
+ ex,
+ Run,
+ _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+ __new__,
+ setUp,
+ __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+ _fields,
+ _replace,
+ _source,
+ _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=builtins.BaseException,
+ builtins.Exception
diff --git a/requirements.txt b/requirements.txt
index f07feee..3613381 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-pelican
+pelican # TODO: should this include [markdown] ?
pelican-sitemap # pelican plugin offering
soupsieve # needed by BeautifulSoup4
BeautifulSoup4 # needed by several plugins
diff --git a/tests/issue62/README.md b/tests/issue62/README.md
new file mode 100644
index 0000000..676ef94
--- /dev/null
+++ b/tests/issue62/README.md
@@ -0,0 +1,7 @@
+This directory contains files to test issue62
+
+* compare_meta.sh - runs pelican with GFM and default markdown and compares output
+* content/one.md - test file with various meta-data entries
+* pelican.def.py - pelican config for the default (non-GFM) markdown
+* pelican.gfm.py - pelican config for GFM markdown plugin
+* themes/trivial/templates/base.html - template to show how the markdown source is handled
diff --git a/tests/issue62/compare_meta.sh b/tests/issue62/compare_meta.sh
new file mode 100755
index 0000000..08fdb52
--- /dev/null
+++ b/tests/issue62/compare_meta.sh
@@ -0,0 +1,5 @@
+echo Running GFM
+pelican content --settings pelican.gfm.py -d --o gfm
+echo Running default Markdown
+pelican content --settings pelican.def.py -d --o def
+diff -r gfm def
diff --git a/tests/issue62/content/one.md b/tests/issue62/content/one.md
new file mode 100644
index 0000000..3d3e507
--- /dev/null
+++ b/tests/issue62/content/one.md
@@ -0,0 +1,11 @@
+title: Test file one
+tags: abc;def,ghi
+author: Writer1
+xyz: 456
+date: 2023-01-10
+modified: 2022 10 01 11:23
+
+
+first
+
+second
diff --git a/tests/issue62/def/one.html b/tests/issue62/def/one.html
new file mode 100644
index 0000000..f092452
--- /dev/null
+++ b/tests/issue62/def/one.html
@@ -0,0 +1,12 @@
+metadata:
+ category misc <class 'pelican.urlwrappers.Category'>
+ path_no_ext one <class 'str'>
+ title Test file one <class 'str'>
+ tags [<Tag 'abc'>, <Tag 'def,ghi'>] <class 'list'>
+ author Writer1 <class 'pelican.urlwrappers.Author'>
+ xyz 456 <class 'str'>
+ date 2023-01-10 00:00:00 <class 'pelican.utils.SafeDatetime'>
+ modified 2022-10-01 11:23:00 <class 'pelican.utils.SafeDatetime'>
+
+content: <p>first</p>
+<p>second</p>
\ No newline at end of file
diff --git a/tests/issue62/pelican.def.py b/tests/issue62/pelican.def.py
new file mode 100644
index 0000000..708a680
--- /dev/null
+++ b/tests/issue62/pelican.def.py
@@ -0,0 +1,46 @@
+# Basic information about the site.
+SITENAME = 'Apache Template'
+SITEDESC = 'Provides a template for projects wishing to use the Pelican ASF static content system'
+SITEDOMAIN = 'template.apache.org'
+SITEURL = 'https://template.apache.org'
+SITELOGO = 'https://template.apache.org/images/logo.png'
+SITEREPOSITORY = 'https://github.com/apache/template-site/blob/main/content/'
+CURRENTYEAR = 2023
+TRADEMARKS = 'Apache, the Apache feather logo, and "Project" are trademarks or registered trademarks'
+TIMEZONE = 'UTC'
+# Theme includes templates and possibly static files
+THEME = 'themes/trivial'
+# Specify location of plugins, and which to use
+PLUGINS = [] # don't autoload any plugins
+# All content is located at '.' (aka content/ )
+PAGE_PATHS = [ '.' ]
+STATIC_PATHS = [ '.', ]
+# Where to place/link generated pages
+
+PATH_METADATA = '(?P<path_no_ext>.*)\\..*'
+
+PAGE_SAVE_AS = '{path_no_ext}.html'
+# Don't try to translate
+PAGE_TRANSLATION_ID = None
+# Disable unused Pelican features
+# N.B. These features are currently unsupported, see https://github.com/apache/infrastructure-pelican/issues/49
+FEED_ALL_ATOM = None
+INDEX_SAVE_AS = ''
+TAGS_SAVE_AS = ''
+CATEGORIES_SAVE_AS = ''
+AUTHORS_SAVE_AS = ''
+ARCHIVES_SAVE_AS = ''
+# Disable articles by pointing to a (should-be-absent) subdir
+ARTICLE_PATHS = [ 'blog' ]
+# needed to create blogs page
+ARTICLE_URL = 'blog/{slug}.html'
+ARTICLE_SAVE_AS = 'blog/{slug}.html'
+# Disable all processing of .html files
+READERS = { 'html': None, }
+
+
+
+
+
+
+
diff --git a/tests/issue62/pelican.gfm.py b/tests/issue62/pelican.gfm.py
new file mode 100644
index 0000000..fdca8af
--- /dev/null
+++ b/tests/issue62/pelican.gfm.py
@@ -0,0 +1,48 @@
+
+# Basic information about the site.
+SITENAME = 'Apache Template'
+SITEDESC = 'Provides a template for projects wishing to use the Pelican ASF static content system'
+SITEDOMAIN = 'template.apache.org'
+SITEURL = 'https://template.apache.org'
+SITELOGO = 'https://template.apache.org/images/logo.png'
+SITEREPOSITORY = 'https://github.com/apache/template-site/blob/main/content/'
+CURRENTYEAR = 2023
+TRADEMARKS = 'Apache, the Apache feather logo, and "Project" are trademarks or registered trademarks'
+TIMEZONE = 'UTC'
+# Theme includes templates and possibly static files
+THEME = 'themes/trivial'
+# Specify location of plugins, and which to use
+PLUGIN_PATHS = [ '../../plugins', ]
+PLUGINS = [ 'gfm', ]
+# All content is located at '.' (aka content/ )
+PAGE_PATHS = [ '.' ]
+STATIC_PATHS = [ '.', ]
+# Where to place/link generated pages
+
+PATH_METADATA = '(?P<path_no_ext>.*)\\..*'
+
+PAGE_SAVE_AS = '{path_no_ext}.html'
+# Don't try to translate
+PAGE_TRANSLATION_ID = None
+# Disable unused Pelican features
+# N.B. These features are currently unsupported, see https://github.com/apache/infrastructure-pelican/issues/49
+FEED_ALL_ATOM = None
+INDEX_SAVE_AS = ''
+TAGS_SAVE_AS = ''
+CATEGORIES_SAVE_AS = ''
+AUTHORS_SAVE_AS = ''
+ARCHIVES_SAVE_AS = ''
+# Disable articles by pointing to a (should-be-absent) subdir
+ARTICLE_PATHS = [ 'blog' ]
+# needed to create blogs page
+ARTICLE_URL = 'blog/{slug}.html'
+ARTICLE_SAVE_AS = 'blog/{slug}.html'
+# Disable all processing of .html files
+READERS = { 'html': None, }
+
+
+
+
+
+
+
diff --git a/tests/issue62/themes/trivial/templates/base.html b/tests/issue62/themes/trivial/templates/base.html
new file mode 100644
index 0000000..16c5fe6
--- /dev/null
+++ b/tests/issue62/themes/trivial/templates/base.html
@@ -0,0 +1,8 @@
+metadata:
+{% for name, value in page.metadata.items() %}
+ {% if name != 'slug' and name != 'reader' %}
+ {{ name }} {{ value }} {{ value.__class__}}
+ {% endif %}
+{% endfor %}
+
+content: {{ page.content.rstrip() }}
diff --git a/tests/issue82/.gitignore b/tests/issue82/.gitignore
new file mode 100644
index 0000000..2a94754
--- /dev/null
+++ b/tests/issue82/.gitignore
@@ -0,0 +1 @@
+out/*
\ No newline at end of file
diff --git a/tests/issue82/asfdataboth1.yaml b/tests/issue82/asfdataboth1.yaml
new file mode 100644
index 0000000..35f7e8d
--- /dev/null
+++ b/tests/issue82/asfdataboth1.yaml
@@ -0,0 +1,14 @@
+ci:
+ # load, transform, and create data sequences from committee info
+ url: https://whimsy.apache.org/public/committee-info.json
+ officers:
+ description: 'Foundation Officers sequence'
+ # select ci['officers'] for the sequence
+ path: officers
+ # convert ci['officers']['roster']
+ asfid: roster
+ ci:
+ # used on /foundation/
+ description: 'Dictionary of officers and committees'
+ # save a merged dictionary version of these sequences.
+ dictionary: officers
diff --git a/tests/issue82/asfdataboth2.yaml b/tests/issue82/asfdataboth2.yaml
new file mode 100644
index 0000000..3879b9b
--- /dev/null
+++ b/tests/issue82/asfdataboth2.yaml
@@ -0,0 +1,17 @@
+ci1:
+ # load, transform, and create data sequences from committee info
+ url: https://whimsy.apache.org/public/committee-info.json
+ officers:
+ description: 'Foundation Officers sequence'
+ # select ci['officers'] for the sequence
+ path: officers
+ # convert ci['officers']['roster']
+ asfid: roster
+ci2:
+ # load, transform, and create data sequences from committee info
+ url: https://whimsy.apache.org/public/committee-info.json
+ ci:
+ # used on /foundation/
+ description: 'Dictionary of officers and committees'
+ # save a merged dictionary version of these sequences.
+ dictionary: officers
diff --git a/tests/issue82/asfdataci.yaml b/tests/issue82/asfdataci.yaml
new file mode 100644
index 0000000..ab7212d
--- /dev/null
+++ b/tests/issue82/asfdataci.yaml
@@ -0,0 +1,8 @@
+ci:
+ # load, transform, and create data sequences from committee info
+ url: https://whimsy.apache.org/public/committee-info.json
+ ci:
+ # used on /foundation/
+ description: 'Dictionary of officers and committees'
+ # save a merged dictionary version of these sequences.
+ dictionary: officers
diff --git a/tests/issue82/runtest.sh b/tests/issue82/runtest.sh
new file mode 100755
index 0000000..9b92101
--- /dev/null
+++ b/tests/issue82/runtest.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+if [ -d out ]
+then
+ rm -f out/*
+else
+ mkdir out
+fi
+PYTHONPATH="../.." python testasfdata.py
+echo "=========================="
+echo "show that CI matches OK when using separate sections"
+diff out/asfdataboth2.out out/asfdataci.out
+echo "=========================="
+echo "Show difference when using the same section"
+diff out/asfdataboth1.out out/asfdataci.out
diff --git a/tests/issue82/testasfdata.py b/tests/issue82/testasfdata.py
new file mode 100644
index 0000000..b67eb4b
--- /dev/null
+++ b/tests/issue82/testasfdata.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import pprint
+import pelican
+import plugins.asfdata as asfdata
+
+settings = { # Keep pelican happy
+ 'MARKDOWN': {},
+ 'FORMATTED_FIELDS': [],
+ 'PATH': '.',
+ 'OUTPUT_PATH': 'output',
+ 'THEME': 'simple',
+ 'IGNORE_FILES': [],
+ 'DELETE_OUTPUT_DIRECTORY': False,
+ 'OUTPUT_RETENTION': 0,
+ 'ASF_DATA': {
+ 'data': 'TBA',
+ 'metadata': {
+ },
+ 'debug': False,
+ }
+}
+
+def test(name):
+ input = f"{name}.yaml"
+ output = f"out/{name}.out"
+ print(f"{input} => {output}")
+ settings['ASF_DATA']['data'] = input
+ settings['ASF_DATA']['metadata'] = {}
+ pc = pelican.Pelican(settings)
+ asfdata.config_read_data(pc)
+ pp = pprint.PrettyPrinter(indent=2,stream=open(output, 'w'))
+ metadata = settings['ASF_DATA']['metadata']
+ # Not interested in these for comparisons
+ metadata.pop('officers', None)
+ metadata.pop('officers_size', None)
+ pp.pprint(metadata)
+
+test('asfdataboth1')
+test('asfdataboth2')
+test('asfdataci')
diff --git a/tests/postrun/.gitignore b/tests/postrun/.gitignore
new file mode 100644
index 0000000..65f8022
--- /dev/null
+++ b/tests/postrun/.gitignore
@@ -0,0 +1,4 @@
+site-generated/*
+temp/*
+pelican.auto.py
+pelicanconf.yaml
diff --git a/tests/postrun/content/empty.md b/tests/postrun/content/empty.md
new file mode 100644
index 0000000..027ae71
--- /dev/null
+++ b/tests/postrun/content/empty.md
@@ -0,0 +1,3 @@
+title: Dummy
+
+Dummy file
diff --git a/tests/postrun/pelicanconf1.yaml b/tests/postrun/pelicanconf1.yaml
new file mode 100644
index 0000000..73950ed
--- /dev/null
+++ b/tests/postrun/pelicanconf1.yaml
@@ -0,0 +1,16 @@
+site:
+ name: Apache Software Foundation
+ domain: test.apache.org
+ logo: None
+ repository: None
+ description: Test run command
+ trademarks: Apache, the Apache feather logo are trademarks
+
+theme: theme/apache
+
+plugins:
+ use: []
+
+setup:
+ run:
+ - /bin/bash run.sh
diff --git a/tests/postrun/pelicanconf2.yaml b/tests/postrun/pelicanconf2.yaml
new file mode 100644
index 0000000..9c21ba7
--- /dev/null
+++ b/tests/postrun/pelicanconf2.yaml
@@ -0,0 +1,16 @@
+site:
+ name: Apache Software Foundation
+ domain: test.apache.org
+ logo: None
+ repository: None
+ description: Test run command
+ trademarks: Apache, the Apache feather logo are trademarks
+
+theme: theme/apache
+
+plugins:
+ use: []
+
+setup:
+ postrun:
+ - /bin/bash postrun.sh
diff --git a/tests/postrun/pelicanconf3.yaml b/tests/postrun/pelicanconf3.yaml
new file mode 100644
index 0000000..86f028b
--- /dev/null
+++ b/tests/postrun/pelicanconf3.yaml
@@ -0,0 +1,18 @@
+site:
+ name: Apache Software Foundation
+ domain: test.apache.org
+ logo: None
+ repository: None
+ description: Test run command
+ trademarks: Apache, the Apache feather logo are trademarks
+
+theme: theme/apache
+
+plugins:
+ use: []
+
+setup:
+ run:
+ - /bin/bash run.sh
+ postrun:
+ - /bin/bash postrun.sh
diff --git a/tests/postrun/postrun.sh b/tests/postrun/postrun.sh
new file mode 100755
index 0000000..a0ff421
--- /dev/null
+++ b/tests/postrun/postrun.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+# Script to test postrun command
+
+cd temp
+cp date2.txt date2.tmp
+
+if [ -z "$PELICAN_OUTPUT_PATH" ]
+then
+ echo "PELICAN_OUTPUT_PATH is not defined!"
+ exit 1
+fi
diff --git a/tests/postrun/run.sh b/tests/postrun/run.sh
new file mode 100755
index 0000000..76cef6a
--- /dev/null
+++ b/tests/postrun/run.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+# Script to test run command
+
+cd temp
+cp date1.txt date1.tmp
diff --git a/tests/postrun/runtest.sh b/tests/postrun/runtest.sh
new file mode 100755
index 0000000..69eb2a8
--- /dev/null
+++ b/tests/postrun/runtest.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Script to set up and run a test
+
+setup() {
+
+ rm -rf site-generated
+ rm -f pelicanconf.yaml
+ rm -f pelican.auto.py
+ rm -rf temp
+ mkdir temp
+ date >temp/date1.txt
+ date >temp/date2.txt
+
+ cp "$1" pelicanconf.yaml
+
+ ../../bin/buildsite.py dir
+
+ grep 'ASF_RUN\|ASF_POSTRUN' pelican.auto.py
+
+}
+
+export LIBCMARKDIR=Dummy # needed even if gfm not loaded
+
+#=============================
+
+setup pelicanconf1.yaml # This tests run script
+
+echo "date1.tmp should exist as copy of date1.txt"
+cmp temp/date1.txt temp/date1.tmp
+echo "date2.tmp should not exist"
+test -f temp/date2.tmp && { echo "date2.tmp is present!"; exit 1; }
+
+#=============================
+
+setup pelicanconf2.yaml # This tests postrun script
+
+echo "date2.tmp should exist as copy of date2.txt"
+cmp temp/date2.txt temp/date2.tmp
+echo "date1.tmp should not exist"
+test -f temp/date1.tmp && { echo "date1.tmp is present!"; exit 1; }
+
+#=============================
+
+setup pelicanconf3.yaml # This tests run and postrun script
+
+echo "date1.tmp should exist as copy of date1.txt"
+cmp temp/date1.txt temp/date1.tmp
+echo "date2.tmp should exist as copy of date2.txt"
+cmp temp/date2.txt temp/date2.tmp
+
+#=============================
diff --git a/tests/postrun/theme/apache/base.html b/tests/postrun/theme/apache/base.html
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/postrun/theme/apache/base.html