blob: 5e4c42c73efdcac03668b5fe03404e1d93f86e33 [file] [log] [blame]
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import hashlib
import os
import re
import sys
import time
import click
import pygit2
import github3
import gnupg
from io import StringIO
from pathlib import Path
from textwrap import dedent
from jinja2 import Template, StrictUndefined
from setuptools_scm import get_version
from ruamel.yaml import YAML
CWD = Path(__file__).parent.absolute()
class GitRemoteCallbacks(pygit2.RemoteCallbacks):
def __init__(self, token):
self.token = token
self.attempts = 0
super().__init__()
def push_update_reference(self, refname, message):
pass
def update_tips(self, refname, old, new):
pass
def credentials(self, url, username_from_url, allowed_types):
# its a libgit2 bug, that it infinitly retries the authentication
self.attempts += 1
if self.attempts >= 5:
# pygit2 doesn't propagate the exception properly
msg = 'Wrong oauth personal access token'
print(msg)
raise ValueError(msg)
if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERPASS_PLAINTEXT:
return pygit2.UserPass(self.token, 'x-oauth-basic')
else:
return None
class Repo:
"""Base class for interaction with local git repositories
A high level wrapper used for both reading revision information from
arrow's repository and pushing continuous integration tasks to the queue
repository.
"""
def __init__(self, path, github_token=None):
self.path = Path(path)
self.repo = pygit2.Repository(str(self.path))
self.github_token = github_token
self._updated_refs = []
def __str__(self):
tpl = dedent('''
Repo: {remote}@{branch}
Commit: {head}
''')
return tpl.format(
remote=self.remote_url,
branch=self.branch.branch_name,
head=self.head
)
@property
def origin(self):
return self.repo.remotes['origin']
def fetch(self):
refspec = '+refs/heads/*:refs/remotes/origin/*'
self.origin.fetch([refspec])
def push(self):
callbacks = GitRemoteCallbacks(self.github_token)
self.origin.push(self._updated_refs, callbacks=callbacks)
self.updated_refs = []
@property
def head(self):
"""Currently checked out commit's sha"""
return self.repo.head
@property
def branch(self):
"""Currently checked out branch"""
return self.repo.branches[self.repo.head.shorthand]
@property
def remote(self):
"""Currently checked out branch's remote counterpart"""
return self.repo.remotes[self.branch.upstream.remote_name]
@property
def remote_url(self):
"""
Currently checked out branch's remote counterpart URL
If an SSH github url is set, it will be replaced by the https
equivalent.
"""
return self.remote.url.replace(
'git@github.com:', 'https://github.com/')
@property
def email(self):
return next(self.repo.config.get_multivar('user.email'))
@property
def signature(self):
name = next(self.repo.config.get_multivar('user.name'))
return pygit2.Signature(name, self.email, int(time.time()))
def create_branch(self, branch_name, files, parents=[], message=''):
# 1. create tree
builder = self.repo.TreeBuilder()
for filename, content in files.items():
# insert the file and creating the new filetree
blob_id = self.repo.create_blob(content)
builder.insert(filename, blob_id, pygit2.GIT_FILEMODE_BLOB)
tree_id = builder.write()
# 2. create commit with the tree created above
author = committer = self.signature
commit_id = self.repo.create_commit(None, author, committer, message,
tree_id, parents)
commit = self.repo[commit_id]
# 3. create branch pointing to the previously created commit
branch = self.repo.create_branch(branch_name, commit)
# append to the pushable references
self._updated_refs.append('refs/heads/{}'.format(branch_name))
return branch
def create_tag(self, tag_name, commit_id, message=''):
tag_id = self.repo.create_tag(tag_name, commit_id,
pygit2.GIT_OBJ_COMMIT, self.signature,
message)
# append to the pushable references
self._updated_refs.append('refs/tags/{}'.format(tag_name))
return self.repo[tag_id]
def file_contents(self, commit_id, file):
commit = self.repo[commit_id]
entry = commit.tree[file]
blob = self.repo[entry.id]
return blob.data
def _parse_github_user_repo(self):
m = re.match('.*\/([^\/]+)\/([^\/\.]+)(\.git)?$', self.remote_url)
user, repo = m.group(1), m.group(2)
return user, repo
def as_github_repo(self):
"""Converts it to a repository object which wraps the GitHub API"""
username, reponame = self._parse_github_user_repo()
gh = github3.login(token=self.github_token)
return gh.repository(username, reponame)
class Queue(Repo):
def _next_job_id(self, prefix):
"""Auto increments the branch's identifier based on the prefix"""
pattern = re.compile('[\w\/-]*{}-(\d+)'.format(prefix))
matches = list(filter(None, map(pattern.match, self.repo.branches)))
if matches:
latest = max(int(m.group(1)) for m in matches)
else:
latest = 0
return '{}-{}'.format(prefix, latest + 1)
def get(self, job_name):
branch_name = 'origin/{}'.format(job_name)
branch = self.repo.branches[branch_name]
content = self.file_contents(branch.target, 'job.yml')
buffer = StringIO(content.decode('utf-8'))
return yaml.load(buffer)
def put(self, job, prefix='build'):
# TODO(kszucs): more verbose error handling
assert isinstance(job, Job)
assert job.branch is None
assert len(job.tasks) > 0
# auto increment and set next job id, e.g. build-85
job.branch = self._next_job_id(prefix)
# create tasks' branches
for task_name, task in job.tasks.items():
task.branch = '{}-{}'.format(job.branch, task_name)
files = task.render_files(job=job, arrow=job.target)
branch = self.create_branch(task.branch, files=files)
self.create_tag(task.tag, branch.target)
task.commit = str(branch.target)
# create job's branch with its description
return self.create_branch(job.branch, files=job.render_files())
def github_statuses(self, job):
repo = self.as_github_repo()
return {name: repo.commit(task.commit).status()
for name, task in job.tasks.items()}
def github_assets(self, task):
repo = self.as_github_repo()
try:
release = repo.release_from_tag(task.tag)
except github3.exceptions.NotFoundError:
return {}
assets = {a.name: a for a in release.assets()}
artifacts = {}
for artifact in task.artifacts:
# artifact can be a regex pattern
pattern = re.compile(artifact)
matches = list(filter(None, map(pattern.match, assets.keys())))
num_matches = len(matches)
# validate artifact pattern matches single asset
if num_matches > 1:
raise ValueError(
'Only a single asset should match pattern `{}`, there are '
'multiple ones: {}'.format(', '.join(matches))
)
elif num_matches == 1:
artifacts[artifact] = assets[matches[0].group(0)]
return artifacts
def upload_assets(self, job, files, content_type):
repo = self.as_github_repo()
release = repo.release_from_tag(job.branch)
assets = {a.name: a for a in release.assets()}
for path in files:
if path.name in assets:
# remove already uploaded asset
assets[path.name].delete()
with path.open('rb') as fp:
release.upload_asset(name=path.name, asset=fp,
content_type=content_type)
class Target:
"""Describes target repository and revision the builds run against
This serializable data container holding information about arrow's
git remote, branch, sha and version number as well as some metadata
(currently only an email address where the notification should be sent).
"""
def __init__(self, head, branch, remote, version, email=None):
self.head = head
self.email = email
self.branch = branch
self.remote = remote
self.version = version
@classmethod
def from_repo(cls, repo):
assert isinstance(repo, Repo)
version = get_version(repo.path, local_scheme=lambda v: '')
return cls(head=str(repo.head.target),
email=repo.email,
branch=repo.branch.branch_name,
remote=repo.remote_url,
version=version)
class Task:
"""Describes a build task and metadata required to render CI templates
A task is represented as a single git commit and branch containing jinja2
rendered files (currently appveyor.yml or .travis.yml configurations).
A task can't be directly submitted to a queue, must belong to a job.
Each task's unique identifier is its branch name, which is generated after
submitting the job to a queue.
"""
def __init__(self, platform, template, artifacts=None, params=None):
assert platform in {'win', 'osx', 'linux'}
self.platform = platform
self.template = template
self.artifacts = artifacts or []
self.params = params or {}
self.branch = None # filled after adding to a queue
self.commit = None
def render_files(self, **extra_params):
path = CWD / self.template
template = Template(path.read_text(), undefined=StrictUndefined)
rendered = template.render(task=self, **self.params, **extra_params)
return {self.filename: rendered}
@property
def tag(self):
return self.branch
@property
def ci(self):
if self.platform == 'win':
return 'appveyor'
else:
return 'travis'
@property
def filename(self):
if self.ci == 'appveyor':
return 'appveyor.yml'
else:
return '.travis.yml'
class Job:
"""Describes multiple tasks against a single target repository"""
def __init__(self, target, tasks):
assert isinstance(target, Target)
assert all(isinstance(task, Task) for task in tasks.values())
self.target = target
self.tasks = tasks
self.branch = None # filled after adding to a queue
def render_files(self):
with StringIO() as buf:
yaml.dump(self, buf)
content = buf.getvalue()
return {'job.yml': content}
@property
def email(self):
return os.environ.get('CROSSBOW_EMAIL', self.target.email)
# configure yaml serializer
yaml = YAML()
yaml.register_class(Job)
yaml.register_class(Task)
yaml.register_class(Target)
# state color mapping to highlight console output
COLORS = {'ok': 'green',
'error': 'red',
'missing': 'red',
'failure': 'red',
'pending': 'yellow',
'success': 'green'}
# define default paths
DEFAULT_CONFIG_PATH = CWD / 'tasks.yml'
DEFAULT_ARROW_PATH = CWD.parents[1]
DEFAULT_QUEUE_PATH = CWD.parents[2] / 'crossbow'
@click.group()
@click.option('--github-token', '-t', default=None,
help='OAuth token for GitHub authentication')
@click.option('--arrow-path', '-a',
type=click.Path(exists=True), default=DEFAULT_ARROW_PATH,
help='Arrow\'s repository path. Defaults to the repository of '
'this script')
@click.option('--queue-path', '-q',
type=click.Path(exists=True), default=DEFAULT_QUEUE_PATH,
help='The repository path used for scheduling the tasks. '
'Defaults to crossbow directory placed next to arrow')
@click.pass_context
def crossbow(ctx, github_token, arrow_path, queue_path):
if github_token is None:
raise click.ClickException(
'Could not determine GitHub token. Please set the '
'CROSSBOW_GITHUB_TOKEN environment variable to a '
'valid GitHub access token or pass one to --github-token.'
)
ctx.obj['arrow'] = Repo(Path(arrow_path))
ctx.obj['queue'] = Queue(Path(queue_path), github_token=github_token)
def load_tasks_from_config(config_path, task_names, group_names):
with Path(config_path).open() as fp:
config = yaml.load(fp)
valid_groups = set(config['groups'].keys())
requested_groups = set(group_names)
invalid_groups = requested_groups - valid_groups
if invalid_groups:
raise click.ClickException('Invalid group(s) {!r}. Must be one of {!r}'
.format(invalid_groups, valid_groups))
valid_tasks = set(config['tasks'].keys())
requested_tasks = set(
sum([config['groups'][g] for g in group_names], list(task_names))
)
invalid_tasks = requested_tasks - valid_tasks
if invalid_tasks:
raise click.ClickException('Invalid task(s) {!r}. Must be one of {!r}'
.format(invalid_tasks, valid_tasks))
return {t: config['tasks'][t] for t in requested_tasks}
@crossbow.command()
@click.argument('task', nargs=-1, required=False)
@click.option('--group', '-g', multiple=True,
help='Submit task groups as defined in task.yml')
@click.option('--job-prefix', default='build',
help='Arbitrary prefix for branch names, e.g. nightly')
@click.option('--config-path', '-c',
type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
help='Task configuration yml. Defaults to tasks.yml')
@click.option('--arrow-version', '-v', default=None,
help='Set target version explicitly')
@click.option('--dry-run/--push', default=False,
help='Just display the rendered CI configurations without '
'submitting them')
@click.pass_context
def submit(ctx, task, group, job_prefix, config_path, arrow_version, dry_run):
queue, arrow = ctx.obj['queue'], ctx.obj['arrow']
target = Target.from_repo(arrow)
# explicitly set arrow version
if arrow_version:
target.version = arrow_version
# task and group variables are lists, containing multiple values
tasks = {}
task_configs = load_tasks_from_config(config_path, task, group)
for name, task in task_configs.items():
# replace version number and create task instance from configuration
artifacts = task.pop('artifacts', None) or [] # because of yaml
artifacts = [fn.format(version=target.version) for fn in artifacts]
tasks[name] = Task(**task, artifacts=artifacts)
# create job instance, doesn't mutate git data yet
job = Job(target=target, tasks=tasks)
if dry_run:
yaml.dump(job, sys.stdout)
delimiter = '-' * 79
for task_name, task in job.tasks.items():
files = task.render_files(job=job, arrow=job.target)
for filename, content in files.items():
click.echo('\n\n')
click.echo(delimiter)
click.echo('{:<29}{:>50}'.format(task_name, filename))
click.echo(delimiter)
click.echo(content)
else:
queue.fetch()
queue.put(job, prefix=job_prefix)
queue.push()
yaml.dump(job, sys.stdout)
click.echo('Pushed job identifier is: `{}`'.format(job.branch))
@crossbow.command()
@click.argument('job-name', required=True)
@click.pass_context
def status(ctx, job_name):
queue = ctx.obj['queue']
queue.fetch()
tpl = '[{:>7}] {:<49} {:>20}'
header = tpl.format('status', 'branch', 'artifacts')
click.echo(header)
click.echo('-' * len(header))
job = queue.get(job_name)
statuses = queue.github_statuses(job)
for task_name, task in sorted(job.tasks.items()):
status = statuses[task_name]
assets = queue.github_assets(task)
uploaded = 'uploaded {} / {}'.format(
sum(a in assets for a in task.artifacts),
len(task.artifacts)
)
leadline = tpl.format(status.state.upper(), task.branch, uploaded)
click.echo(click.style(leadline, fg=COLORS[status.state]))
for artifact in task.artifacts:
try:
asset = assets[artifact]
except KeyError:
state = 'pending' if status.state == 'pending' else 'missing'
filename = '{:>70} '.format(artifact)
else:
state = 'ok'
filename = '{:>70} '.format(asset.name)
statemsg = '[{:>7}]'.format(state.upper())
click.echo(filename + click.style(statemsg, fg=COLORS[state]))
def hashbytes(bytes, algoname):
"""Hash `bytes` using the algorithm named `algoname`.
Parameters
----------
bytes : bytes
The bytes to hash
algoname : str
The name of class in the hashlib standard library module
Returns
-------
str
Hexadecimal digest of `bytes` hashed using `algoname`
"""
algo = getattr(hashlib, algoname)()
algo.update(bytes)
result = algo.hexdigest()
return result
@crossbow.command()
@click.argument('job-name', required=True)
@click.option('-g', '--gpg-homedir', default=None,
type=click.Path(exists=True, file_okay=False, dir_okay=True),
help=('Full pathname to directory containing the public and '
'private keyrings. Default is whatever GnuPG defaults to'))
@click.option('-t', '--target-dir', default=DEFAULT_ARROW_PATH / 'packages',
type=click.Path(file_okay=False, dir_okay=True),
help='Directory to download the build artifacts')
@click.option('-a', '--algorithm',
default=['sha1', 'sha256'],
show_default=True,
type=click.Choice(sorted(hashlib.algorithms_guaranteed)),
multiple=True,
help=('Algorithm(s) used to generate checksums. Pass multiple '
'algorithms by passing -a/--algorithm multiple times'))
@click.pass_context
def sign(ctx, job_name, gpg_homedir, target_dir, algorithm):
"""Download and sign build artifacts from github releases"""
gpg = gnupg.GPG(gnupghome=gpg_homedir)
# fetch the queue repository
queue = ctx.obj['queue']
queue.fetch()
# query the job's artifacts
job = queue.get(job_name)
target_dir = Path(target_dir).absolute() / job_name
target_dir.mkdir(parents=True, exist_ok=True)
click.echo('Download {}\'s artifacts to {}'.format(job_name, target_dir))
tpl = '{:<10} {:>73}'
task_items = sorted(job.tasks.items())
ntasks = len(task_items)
for i, (task_name, task) in enumerate(task_items, start=1):
assets = queue.github_assets(task)
artifact_dir = target_dir / task_name
artifact_dir.mkdir(exist_ok=True)
basemsg = 'Downloading and signing assets for task {}'.format(
click.style(task_name, bold=True)
)
click.echo(
'\n{} {:>{size}}' .format(
basemsg,
click.style('{}/{}'.format(i, ntasks), bold=True),
size=89 - (len(basemsg) + 1) + 2 * len(
click.style('', bold=True))
)
)
click.echo('-' * 89)
for artifact in task.artifacts:
try:
asset = assets[artifact]
except KeyError:
msg = click.style('[{:>13}]'.format('MISSING'),
fg=COLORS['missing'])
click.echo(tpl.format(msg, artifact))
else:
click.echo(click.style(artifact, bold=True))
# download artifact
artifact_path = artifact_dir / asset.name
asset.download(artifact_path)
# sign the artifact
signature_path = Path(str(artifact_path) + '.asc')
with artifact_path.open('rb') as fp:
gpg.sign_file(fp, detach=True, clearsign=False,
binary=False,
output=str(signature_path))
# compute checksums for the artifact
artifact_bytes = artifact_path.read_bytes()
for algo in algorithm:
suffix = '.{}'.format(algo)
checksum_path = Path(str(artifact_path) + suffix)
checksum = '{} {}'.format(
hashbytes(artifact_bytes, algo), artifact_path.name
)
checksum_path.write_text(checksum)
msg = click.style(
'[{:>13}]'.format('{} HASHED'.format(algo)),
fg='blue'
)
click.echo(tpl.format(msg, checksum_path.name))
msg = click.style('[{:>13}]'.format('SIGNED'), fg=COLORS['ok'])
click.echo(tpl.format(msg, str(signature_path.name)))
if __name__ == '__main__':
crossbow(obj={}, auto_envvar_prefix='CROSSBOW')