blob: ca7c08f056e95684b8cc368c69d62c793bb31ffa [file] [log] [blame]
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Simple script to set up jekyll for building the site.
# This may evolve to have other handy tools.
from __future__ import print_function
import os
import logging
import re
import shutil
import subprocess
import sys
import textwrap
SITE_ROOT = os.path.abspath(os.path.dirname(__file__))
GEM_PATH = os.path.join(SITE_ROOT, ".gem")
BUNDLE_PATH = os.path.join(GEM_PATH, "bin", "bundle")
# Import helpers to support Python 2.6 (CentOS 6).
sys.path.append(os.path.join(SITE_ROOT, "_tools"))
from kudu_util import check_output
# The repository and branch where we version the actual site source (jekyll
# code, posts, etc)
SITE_REPO = "https://gitbox.apache.org/repos/asf/kudu.git"
SITE_BRANCH = "gh-pages"
# The repository and branch from which we publish the official ASF site.
# Changes here are immediately visible to the world.
PUBLISH_REPO = "https://gitbox.apache.org/repos/asf/kudu-site.git"
PUBLISH_BRANCH = "asf-site"
# The local directory in which we check out PUBLISH_REPO and build the site.
PUBLISH_DIR = os.path.join(SITE_ROOT, "_publish")
# A tag that ends up in the git commit messages in PUBLISH_REPO, indicating
# the revision within SITE_REPO that corresponds to each commit.
SITE_COMMIT_TAG = "Site-Repo-Commit"
def eprint(message):
print(message, file=sys.stderr)
def usage_and_die():
usage = """
usage: %(prog)s <tool> ..."
%(prog)s jekyll ...
Runs jekyll with the given arguments. For example:
%(prog)s jekyll serve
%(prog)s jekyll build
%(prog)s proof
Runs htmlproof to check for broken links, etc.
%(prog)s publish [--no-verify-source]
Checks out the destination (i.e. static HTML) repository into
_publish, builds the current version of the site, and creates
a git commit with the results.
""" % dict(prog=sys.argv[0])
usage = usage.strip()
eprint(usage)
sys.exit(1)
def setup_bundle():
"""
Installs bundler in the local .gem directory, and uses it to
install the necessary gems.
Sets the $GEM_PATH environment variable so that subsequent
subprocess calls can find our gems.
"""
if not os.path.exists(BUNDLE_PATH):
try:
subprocess.check_call(
["gem", "install", "--no-document", "-q",
"--install-dir", GEM_PATH, "bundler"])
except Exception as e:
logging.fatal("Could not install 'bundler' gem: " + str(e) + "\n" +
"Is the 'gem' tool installed?")
sys.exit(1)
os.environ['GEM_PATH'] = GEM_PATH
if subprocess.call([BUNDLE_PATH, "check"]) != 0:
logging.info("Installing gems locally...")
subprocess.check_call([BUNDLE_PATH, "config", "set", "--local", "path", GEM_PATH])
subprocess.check_call([BUNDLE_PATH, "install"])
def run_jekyll(args):
setup_bundle()
return subprocess.call(
[os.path.join(GEM_PATH, "bin", "bundle"), "exec", "jekyll"] + args,
cwd=SITE_ROOT)
def proof(args):
""" Run htmlproof to check for broken links or HTML. """
if len(args) != 0:
usage_and_die()
if run_jekyll(['build']) != 0:
eprint("Could not build site!")
return 1
return subprocess.call(
[BUNDLE_PATH, "exec", 'htmlproofer',
# Dont check the javadocs, old doc versions, or the configuration
# reference files which are extremely large.
'--file-ignore', '/apidocs|releases|configuration_ref|cpp-client-api/',
# Don't check external links (too slow!)
'--disable-external',
# Don't be pedantic about having an alt tag on every image
'--alt-ignore', '/./',
'_site/'])
def publish(args):
""" Publish the site to the public ASF repository. """
os.chdir(SITE_ROOT)
# Check that the site source repo has a commit which has been
# pushed (don't let people publish uncommitted work)
source_sha = check_output(
["git", "rev-parse", "HEAD"]).strip()
subprocess.check_call(
["git", "fetch", SITE_REPO, SITE_BRANCH])
upstream_sha = check_output(
["git", "rev-parse", "FETCH_HEAD"]).strip()
if "--no-verify-source" not in args:
if source_sha != upstream_sha:
eprint("")
eprint(textwrap.fill(textwrap.dedent("""\
Local HEAD (%(source_sha)s) in source repository %(site_root)s
does not match upstream repo branch %(site_branch)s (%(upstream_sha)s).
Maybe you have some locally committed changes that have not
been committed upstream, or you need to rebase on origin/%(site_branch)s
""" % dict(source_sha=source_sha[:8],
site_root=SITE_ROOT,
site_branch=SITE_BRANCH,
upstream_sha=upstream_sha[:8]))))
return 1
status = check_output(["git", "status", "--porcelain"] )
if status.strip() != "":
eprint("Source dir has untracked or modified files")
eprint(status)
return 1
# Clone the repo to publish into if it's not already there.
if not os.path.exists(PUBLISH_DIR):
logging.info("Cloning git repo for publishing...")
subprocess.check_call(
["git", "clone", "-b", PUBLISH_BRANCH, PUBLISH_REPO, PUBLISH_DIR])
if not os.path.exists(os.path.join(PUBLISH_DIR, ".git")):
eprint("%s does not appear to be a git repo." % (PUBLISH_DIR,))
eprint("Remove it and try again")
return 1
os.chdir(PUBLISH_DIR)
# Check that the directory to publish is not dirty and has no untracked files.
status = check_output(["git", "status", "--porcelain"] )
if status.strip() != "":
eprint("Build dir %s has untracked or modified files:" % (PUBLISH_DIR,))
eprint(status)
return 1
# Update the repo to the latest from upstream.
logging.info("Pulling latest published site repo...")
if subprocess.call(["git", "pull", "--ff-only"]) != 0:
eprint("Unable to pull latest published site.")
return 1
# Determine the hash of the last published commit from the source repo,
# based on the published repo's git log. Generate a git commit message so
# that the published repository has a sensible looking history.
#
# Each commit in the destination repo is tagged with the source repo's hash,
# so that we can easily figure out which commits have been published.
last_publish_log = check_output(
["git", "log", "--grep=^%s: " % SITE_COMMIT_TAG, "-n1"])
if last_publish_log == "":
commit_msg = "Initial import from site repo"
else:
m = re.search(r'^\s*%s: (.+)$' % SITE_COMMIT_TAG, last_publish_log, re.MULTILINE)
if not m:
raise Exception("Unexpected git log: %s" % last_publish_log)
last_publish_sha = m.group(1)
if last_publish_sha == source_sha:
eprint("The site has already been published as of %s" % (source_sha,))
return 1
commit_msg = check_output(
["git", "log", "--pretty=oneline", "--abbrev-commit",
"%s..HEAD" % last_publish_sha],
cwd=SITE_ROOT)
commit_msg = (
"Publish commit(s) from site source repo:\n" +
"\n".join(" " + l for l in commit_msg.splitlines()))
commit_msg += "\n\n%s: %s" % (SITE_COMMIT_TAG, source_sha)
# Remove all of the files currently in the repo before re-building.
# This ensures that, if the source repo has removed a file, it will
# also get removed in the output.
subprocess.check_call(["git", "rm", "-r", "--quiet", "*"])
# Rebuild using Jekyll.
success = False
try:
if run_jekyll(['build', "--destination", PUBLISH_DIR]) != 0:
eprint("Could not build site!")
return 1
success = True
finally:
# If jekyll failed for some reason, we want to restore the old state
# of the repository before exiting.
if not success:
subprocess.check_call(["git", "reset", "--hard"])
subprocess.check_call(["git", "add", "*"])
# Create a new commit in the published repo
subprocess.check_call(["git", "commit", "-a", "-m", commit_msg])
print("Success!")
subprocess.call(["git", "show", "--stat"])
print()
print("To push:")
print(" cd %s && git push" % (PUBLISH_DIR,))
def main(argv):
logging.basicConfig(level=logging.INFO)
if len(argv) == 0:
usage_and_die()
if argv[0] == 'jekyll':
sys.exit(run_jekyll(argv[1:]))
elif argv[0] == 'proof':
proof(argv[1:])
elif argv[0] == 'publish':
sys.exit(publish(argv[1:]))
elif '--help' in argv:
usage_and_die()
else:
eprint("Unknown tool: %s" % (argv[0],))
usage_and_die()
if __name__ == "__main__":
main(sys.argv[1:])