ARROW-12263: [Dev][Packaging] Move Crossbow to Archery
Main highlights:
- prepared for unittesting
- introduced jinja macros to reduce code duplication for crossbow related snippets, like arrow checkout or release uploading
- the tasks.yml is jinja templated now
Closes #9913 from kszucs/crossbow-to-archery
Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 7dd75b5..761e045 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -53,8 +53,7 @@
with:
python-version: '3.6'
- name: Install Archery, Crossbow- and Test Dependencies
- working-directory: dev/archery
- run: pip install pytest responses toolz jinja2 -e .[all]
+ run: pip install pytest responses -e dev/archery[all]
- name: Archery Unittests
working-directory: dev/archery
run: pytest -v archery
@@ -62,4 +61,4 @@
run: archery docker
- name: Crossbow Check Config
working-directory: dev/tasks
- run: python crossbow.py check-config
+ run: archery crossbow check-config
diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 54a57f2..35b69a6 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-import operator
+import os
import shlex
from pathlib import Path
from functools import partial
@@ -24,8 +24,9 @@
import click
import github
-from .utils.crossbow import Crossbow
-from .utils.git import Git
+from .utils.git import git
+from .utils.logger import logger
+from .crossbow import Repo, Queue, Config, Target, Job, CommentReport
class EventError(Exception):
@@ -81,86 +82,6 @@
group = partial(click.group, cls=Group)
-class CrossbowCommentFormatter:
-
- _markdown_badge = '[![{title}]({badge})]({url})'
-
- badges = {
- 'github': _markdown_badge.format(
- title='Github Actions',
- url='https://github.com/{repo}/actions?query=branch:{branch}',
- badge=(
- 'https://github.com/{repo}/workflows/Crossbow/'
- 'badge.svg?branch={branch}'
- ),
- ),
- 'azure': _markdown_badge.format(
- title='Azure',
- url=(
- 'https://dev.azure.com/{repo}/_build/latest'
- '?definitionId=1&branchName={branch}'
- ),
- badge=(
- 'https://dev.azure.com/{repo}/_apis/build/status/'
- '{repo_dotted}?branchName={branch}'
- )
- ),
- 'travis': _markdown_badge.format(
- title='TravisCI',
- url='https://travis-ci.com/{repo}/branches',
- badge='https://img.shields.io/travis/{repo}/{branch}.svg'
- ),
- 'circle': _markdown_badge.format(
- title='CircleCI',
- url='https://circleci.com/gh/{repo}/tree/{branch}',
- badge=(
- 'https://img.shields.io/circleci/build/github'
- '/{repo}/{branch}.svg'
- )
- ),
- 'appveyor': _markdown_badge.format(
- title='Appveyor',
- url='https://ci.appveyor.com/project/{repo}/history',
- badge='https://img.shields.io/appveyor/ci/{repo}/{branch}.svg'
- ),
- 'drone': _markdown_badge.format(
- title='Drone',
- url='https://cloud.drone.io/{repo}',
- badge='https://img.shields.io/drone/build/{repo}/{branch}.svg'
- ),
- }
-
- def __init__(self, crossbow_repo):
- self.crossbow_repo = crossbow_repo
-
- def render(self, job):
- url = 'https://github.com/{repo}/branches/all?query={branch}'
- sha = job['target']['head']
-
- msg = 'Revision: {}\n\n'.format(sha)
- msg += 'Submitted crossbow builds: [{repo} @ {branch}]'
- msg += '({})\n'.format(url)
- msg += '\n|Task|Status|\n|----|------|'
-
- tasks = sorted(job['tasks'].items(), key=operator.itemgetter(0))
- for key, task in tasks:
- branch = task['branch']
-
- try:
- template = self.badges[task['ci']]
- badge = template.format(
- repo=self.crossbow_repo,
- repo_dotted=self.crossbow_repo.replace('/', '.'),
- branch=branch
- )
- except KeyError:
- badge = 'unsupported CI service `{}`'.format(task['ci'])
-
- msg += '\n|{}|{}|'.format(key, badge)
-
- return msg.format(repo=self.crossbow_repo, branch=job['branch'])
-
-
class CommentBot:
def __init__(self, name, handler, token=None):
@@ -195,8 +116,7 @@
try:
command = self.parse_command(payload)
except EventError as e:
- print(e)
- # TODO(kszucs): log
+ logger.error(e)
# see the possible reasons in the validate method
return
@@ -220,14 +140,13 @@
comment = pull.get_issue_comment(payload['comment']['id'])
try:
- self.handler(command, issue=issue, pull=pull, comment=comment)
+ self.handler(command, issue=issue, pull_request=pull,
+ comment=comment)
except CommandError as e:
- # TODO(kszucs): log
- print(e)
+ logger.error(e)
pull.create_issue_comment("```\n{}\n```".format(e.message))
except Exception as e:
- # TODO(kszucs): log
- print(e)
+ logger.error(e)
comment.create_reaction('-1')
else:
comment.create_reaction('+1')
@@ -248,81 +167,94 @@
help='Crossbow repository on github to use')
@click.pass_obj
def crossbow(obj, crossbow):
- """Trigger crossbow builds for this pull request"""
+ """
+ Trigger crossbow builds for this pull request
+ """
obj['crossbow_repo'] = crossbow
+def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
+ """
+ Clone the repositories and initialize crossbow objects.
+
+ Parameters
+ ----------
+ dest : Path
+ Filesystem path to clone the repositories to.
+ crossbow_repo : str
+ Github repository name, like kszucs/crossbow.
+ pull_request : pygithub.PullRequest
+ Object containing information about the pull request the comment bot
+ was triggered from.
+ """
+ arrow_path = dest / 'arrow'
+ queue_path = dest / 'crossbow'
+
+ # clone arrow and checkout the pull request's branch
+ pull_request_ref = 'pull/{}/head:{}'.format(
+ pull_request.number, pull_request.head.ref
+ )
+ git.clone(pull_request.base.repo.clone_url, str(arrow_path))
+ git.fetch('origin', pull_request_ref, git_dir=arrow_path)
+ git.checkout(pull_request.head.ref, git_dir=arrow_path)
+
+ # clone crossbow repository
+ crossbow_url = 'https://github.com/{}'.format(crossbow_repo)
+ git.clone(crossbow_url, str(queue_path))
+
+ # initialize crossbow objects
+ github_token = os.environ['CROSSBOW_GITHUB_TOKEN']
+ arrow = Repo(arrow_path)
+ queue = Queue(queue_path, github_token=github_token, require_https=True)
+
+ return (arrow, queue)
+
+
@crossbow.command()
@click.argument('tasks', nargs=-1, required=False)
@click.option('--group', '-g', 'groups', multiple=True,
help='Submit task groups as defined in tests.yml')
@click.option('--param', '-p', 'params', multiple=True,
help='Additional task parameters for rendering the CI templates')
-@click.option('--dry-run/--push', default=False,
- help='Just display the new changelog, don\'t write it')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
@click.pass_obj
-def submit(obj, tasks, groups, params, dry_run):
- """Submit crossbow testing tasks.
+def submit(obj, tasks, groups, params, arrow_version):
+ """
+ Submit crossbow testing tasks.
See groups defined in arrow/dev/tasks/tests.yml
"""
- from ruamel.yaml import YAML
-
- git = Git()
-
- # construct crossbow arguments
- args = []
- if dry_run:
- args.append('--dry-run')
-
- for p in params:
- args.extend(['-p', p])
- for g in groups:
- args.extend(['-g', g])
- for t in tasks:
- args.append(t)
-
- # pygithub pull request object
- pr = obj['pull']
- crossbow_url = 'https://github.com/{}'.format(obj['crossbow_repo'])
-
+ crossbow_repo = obj['crossbow_repo']
+ pull_request = obj['pull_request']
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
- arrow = tmpdir / 'arrow'
- queue = tmpdir / 'crossbow'
-
- # clone arrow and checkout the pull request's branch
- git.clone(pr.base.repo.clone_url, str(arrow))
- git.fetch('origin', 'pull/{}/head:{}'.format(pr.number, pr.head.ref),
- git_dir=str(arrow))
- git.checkout(pr.head.ref, git_dir=str(arrow))
-
- # clone crossbow
- git.clone(crossbow_url, str(queue))
-
- # submit the crossbow tasks
- result = Path('result.yml').resolve()
- xbow = Crossbow(str(arrow / 'dev' / 'tasks' / 'crossbow.py'))
- xbow.run(
- '--queue-path', str(queue),
- '--output-file', str(result),
- 'submit',
- '--job-prefix', 'actions',
- # don't rely on crossbow's remote and branch detection, because
- # it doesn't work without a tracking upstream branch
- '--arrow-remote', pr.head.repo.clone_url,
- '--arrow-branch', pr.head.ref,
- *args
+ arrow, queue = _clone_arrow_and_crossbow(
+ dest=Path(tmpdir),
+ crossbow_repo=crossbow_repo,
+ pull_request=pull_request,
)
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml")
+ config.validate()
- # parse the result yml describing the submitted job
- yaml = YAML()
- with result.open() as fp:
- job = yaml.load(fp)
+ # initialize the crossbow build's target repository
+ target = Target.from_repo(arrow, version=arrow_version,
+ remote=pull_request.base.repo.clone_url)
- # render the response comment's content
- formatter = CrossbowCommentFormatter(obj['crossbow_repo'])
- response = formatter.render(job)
+ # parse additional job parameters
+ params = dict([p.split("=") for p in params])
- # send the response
- pr.create_issue_comment(response)
+ # instantiate the job object
+ job = Job.from_config(config=config, target=target, tasks=tasks,
+ groups=groups, params=params)
+
+ # add the job to the crossbow queue and push to the remote repository
+ queue.put(job, prefix="actions")
+ queue.push()
+
+ # render the response comment's content
+ report = CommentReport(job, crossbow_repo=crossbow_repo)
+
+ # send the response
+ pull_request.create_issue_comment(report.show())
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 74e2373..bcaddf1 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -1069,5 +1069,23 @@
click.echo('git cherry-pick {}'.format(commit.hexsha))
+try:
+ from .crossbow.cli import crossbow # noqa
+except ImportError as exc:
+ missing_package = exc.name
+
+ @archery.command(
+ 'crossbow',
+ context_settings={"ignore_unknown_options": True}
+ )
+ def crossbow():
+ raise click.ClickException(
+ "Couldn't import crossbow because of missing dependency: {}"
+ .format(missing_package)
+ )
+else:
+ archery.add_command(crossbow)
+
+
if __name__ == "__main__":
archery(obj={})
diff --git a/dev/archery/archery/utils/crossbow.py b/dev/archery/archery/crossbow/__init__.py
similarity index 80%
rename from dev/archery/archery/utils/crossbow.py
rename to dev/archery/archery/crossbow/__init__.py
index f28b976..bc72e81 100644
--- a/dev/archery/archery/utils/crossbow.py
+++ b/dev/archery/archery/crossbow/__init__.py
@@ -15,9 +15,5 @@
# specific language governing permissions and limitations
# under the License.
-from .command import Command, default_bin
-
-
-class Crossbow(Command):
- def __init__(self, crossbow_bin=None):
- self.bin = default_bin(crossbow_bin, "arrow/dev/tasks/crossbow.py")
+from .core import Config, Repo, Queue, Target, Job # noqa
+from .reports import CommentReport, ConsoleReport, EmailReport # noqa
diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py
new file mode 100644
index 0000000..71c25e0
--- /dev/null
+++ b/dev/archery/archery/crossbow/cli.py
@@ -0,0 +1,352 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import click
+
+from .core import Config, Repo, Queue, Target, Job, CrossbowError
+from .reports import EmailReport, ConsoleReport
+from ..utils.source import ArrowSources
+
+
+_default_arrow_path = ArrowSources.find().path
+_default_queue_path = _default_arrow_path.parent / "crossbow"
+_default_config_path = _default_arrow_path / "dev" / "tasks" / "tasks.yml"
+
+
+@click.group()
+@click.option('--github-token', '-t', default=None,
+ envvar="CROSSBOW_GITHUB_TOKEN",
+ help='OAuth token for GitHub authentication')
+@click.option('--arrow-path', '-a',
+ type=click.Path(), default=_default_arrow_path,
+ help='Arrow\'s repository path. Defaults to the repository of '
+ 'this script')
+@click.option('--queue-path', '-q',
+ type=click.Path(), default=_default_queue_path,
+ help='The repository path used for scheduling the tasks. '
+ 'Defaults to crossbow directory placed next to arrow')
+@click.option('--queue-remote', '-qr', default=None,
+ help='Force to use this remote URL for the Queue repository')
+@click.option('--output-file', metavar='<output>',
+ type=click.File('w', encoding='utf8'), default='-',
+ help='Capture output result into file.')
+@click.pass_context
+def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote,
+ output_file):
+ """
+ Schedule packaging tasks or nightly builds on CI services.
+ """
+ ctx.ensure_object(dict)
+ ctx.obj['output'] = output_file
+ ctx.obj['arrow'] = Repo(arrow_path)
+ ctx.obj['queue'] = Queue(queue_path, remote_url=queue_remote,
+ github_token=github_token, require_https=True)
+
+
+@crossbow.command()
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.pass_obj
+def check_config(obj, config_path):
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(config_path)
+ config.validate()
+
+ output = obj['output']
+ config.show(output)
+
+
+@crossbow.command()
+@click.argument('tasks', nargs=-1, required=False)
+@click.option('--group', '-g', 'groups', multiple=True,
+ help='Submit task groups as defined in task.yml')
+@click.option('--param', '-p', 'params', multiple=True,
+ help='Additional task parameters for rendering the CI templates')
+@click.option('--job-prefix', default='build',
+ help='Arbitrary prefix for branch names, e.g. nightly')
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
+@click.option('--arrow-remote', '-r', default=None,
+ help='Set GitHub remote explicitly, which is going to be cloned '
+ 'on the CI services. Note, that no validation happens '
+ 'locally. Examples: https://github.com/apache/arrow or '
+ 'https://github.com/kszucs/arrow.')
+@click.option('--arrow-branch', '-b', default=None,
+ help='Give the branch name explicitly, e.g. master, ARROW-1949.')
+@click.option('--arrow-sha', '-t', default=None,
+ help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
+ 'apache-arrow-0.11.1.')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.option('--dry-run/--commit', default=False,
+ help='Just display the rendered CI configurations without '
+ 'committing them')
+@click.option('--no-push/--push', default=False,
+ help='Don\'t push the changes')
+@click.pass_obj
+def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version,
+ arrow_remote, arrow_branch, arrow_sha, fetch, dry_run, no_push):
+ output = obj['output']
+ queue, arrow = obj['queue'], obj['arrow']
+
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(config_path)
+ try:
+ config.validate()
+ except CrossbowError as e:
+ raise click.ClickException(str(e))
+
+ # Override the detected repo url / remote, branch and sha - this aims to
+ # make release procedure a bit simpler.
+ # Note, that the target resivion's crossbow templates must be
+ # compatible with the locally checked out version of crossbow (which is
+ # in case of the release procedure), because the templates still
+ # contain some business logic (dependency installation, deployments)
+ # which will be reduced to a single command in the future.
+ target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch,
+ head=arrow_sha, version=arrow_version)
+
+ # parse additional job parameters
+ params = dict([p.split("=") for p in params])
+
+ # instantiate the job object
+ try:
+ job = Job.from_config(config=config, target=target, tasks=tasks,
+ groups=groups, params=params)
+ except CrossbowError as e:
+ raise click.ClickException(str(e))
+
+ job.show(output)
+ if dry_run:
+ return
+
+ if fetch:
+ queue.fetch()
+ queue.put(job, prefix=job_prefix)
+
+ if no_push:
+ click.echo('Branches and commits created but not pushed: `{}`'
+ .format(job.branch))
+ else:
+ queue.push()
+ click.echo('Pushed job identifier is: `{}`'.format(job.branch))
+
+
+@crossbow.command()
+@click.argument('task', required=True)
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
+@click.option('--arrow-remote', '-r', default=None,
+ help='Set GitHub remote explicitly, which is going to be cloned '
+ 'on the CI services. Note, that no validation happens '
+ 'locally. Examples: https://github.com/apache/arrow or '
+ 'https://github.com/kszucs/arrow.')
+@click.option('--arrow-branch', '-b', default=None,
+ help='Give the branch name explicitly, e.g. master, ARROW-1949.')
+@click.option('--arrow-sha', '-t', default=None,
+ help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
+ 'apache-arrow-0.11.1.')
+@click.option('--param', '-p', 'params', multiple=True,
+ help='Additional task parameters for rendering the CI templates')
+@click.pass_obj
+def render(obj, task, config_path, arrow_version, arrow_remote, arrow_branch,
+ arrow_sha, params):
+ """
+ Utility command to check the rendered CI templates.
+ """
+ from .core import _flatten
+
+ def highlight(code):
+ try:
+ from pygments import highlight
+ from pygments.lexers import YamlLexer
+ from pygments.formatters import TerminalFormatter
+ return highlight(code, YamlLexer(), TerminalFormatter())
+ except ImportError:
+ return code
+
+ arrow = obj['arrow']
+
+ target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch,
+ head=arrow_sha, version=arrow_version)
+ config = Config.load_yaml(config_path)
+ params = dict([p.split("=") for p in params])
+ job = Job.from_config(config=config, target=target, tasks=[task],
+ params=params)
+
+ for task_name, rendered_files in job.render_tasks().items():
+ for path, content in _flatten(rendered_files).items():
+ click.echo('#' * 80)
+ click.echo('### {:^72} ###'.format("/".join(path)))
+ click.echo('#' * 80)
+ click.echo(highlight(content))
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def status(obj, job_name, fetch):
+ output = obj['output']
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+ job = queue.get(job_name)
+ ConsoleReport(job).show(output)
+
+
+@crossbow.command()
+@click.argument('prefix', required=True)
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def latest_prefix(obj, prefix, fetch):
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+ latest = queue.latest_for_prefix(prefix)
+ click.echo(latest.branch)
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('--sender-name', '-n',
+ help='Name to use for report e-mail.')
+@click.option('--sender-email', '-e',
+ help='E-mail to use for report e-mail.')
+@click.option('--recipient-email', '-r',
+ help='Where to send the e-mail report')
+@click.option('--smtp-user', '-u',
+ help='E-mail address to use for SMTP login')
+@click.option('--smtp-password', '-P',
+ help='SMTP password to use for report e-mail.')
+@click.option('--smtp-server', '-s', default='smtp.gmail.com',
+ help='SMTP server to use for report e-mail.')
+@click.option('--smtp-port', '-p', default=465,
+ help='SMTP port to use for report e-mail.')
+@click.option('--poll/--no-poll', default=False,
+ help='Wait for completion if there are tasks pending')
+@click.option('--poll-max-minutes', default=180,
+ help='Maximum amount of time waiting for job completion')
+@click.option('--poll-interval-minutes', default=10,
+ help='Number of minutes to wait to check job status again')
+@click.option('--send/--dry-run', default=False,
+ help='Just display the report, don\'t send it')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def report(obj, job_name, sender_name, sender_email, recipient_email,
+ smtp_user, smtp_password, smtp_server, smtp_port, poll,
+ poll_max_minutes, poll_interval_minutes, send, fetch):
+ """
+ Send an e-mail report showing success/failure of tasks in a Crossbow run
+ """
+ output = obj['output']
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+
+ job = queue.get(job_name)
+ report = EmailReport(
+ job=job,
+ sender_name=sender_name,
+ sender_email=sender_email,
+ recipient_email=recipient_email
+ )
+
+ if poll:
+ job.wait_until_finished(
+ poll_max_minutes=poll_max_minutes,
+ poll_interval_minutes=poll_interval_minutes
+ )
+
+ if send:
+ report.send(
+ smtp_user=smtp_user,
+ smtp_password=smtp_password,
+ smtp_server=smtp_server,
+ smtp_port=smtp_port
+ )
+ else:
+ report.show(output)
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('-t', '--target-dir',
+ default=_default_arrow_path / 'packages',
+ type=click.Path(file_okay=False, dir_okay=True),
+ help='Directory to download the build artifacts')
+@click.option('--dry-run/--execute', default=False,
+ help='Just display process, don\'t download anything')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def download_artifacts(obj, job_name, target_dir, dry_run, fetch):
+ """Download build artifacts from GitHub releases"""
+ output = obj['output']
+
+ # fetch the queue repository
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+
+ # query the job's artifacts
+ job = queue.get(job_name)
+
+ # create directory to download the assets to
+ target_dir = Path(target_dir).absolute() / job_name
+ target_dir.mkdir(parents=True, exist_ok=True)
+
+ # download the assets while showing the job status
+ def asset_callback(task_name, task, asset):
+ if asset is not None:
+ path = target_dir / task_name / asset.name
+ path.parent.mkdir(exist_ok=True)
+ if not dry_run:
+ asset.download(path)
+
+ click.echo('Downloading {}\'s artifacts.'.format(job_name))
+ click.echo('Destination directory is {}'.format(target_dir))
+ click.echo()
+
+ report = ConsoleReport(job)
+ report.show(output, asset_callback=asset_callback)
+
+
+@crossbow.command()
+@click.option('--sha', required=True, help='Target committish')
+@click.option('--tag', required=True, help='Target tag')
+@click.option('--method', default='curl', help='Use cURL to upload')
+@click.option('--pattern', '-p', 'patterns', required=True, multiple=True,
+ help='File pattern to upload as assets')
+@click.pass_obj
+def upload_artifacts(obj, tag, sha, patterns, method):
+ queue = obj['queue']
+ queue.github_overwrite_release_assets(
+ tag_name=tag, target_commitish=sha, method=method, patterns=patterns
+ )
diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
new file mode 100644
index 0000000..d4d3d51
--- /dev/null
+++ b/dev/archery/archery/crossbow/core.py
@@ -0,0 +1,1161 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import fnmatch
+import glob
+import time
+import logging
+import mimetypes
+import subprocess
+import textwrap
+from io import StringIO
+from pathlib import Path
+from datetime import date
+
+import jinja2
+from ruamel.yaml import YAML
+
+try:
+ import github3
+ _have_github3 = True
+except ImportError:
+ github3 = object
+ _have_github3 = False
+
+try:
+ import pygit2
+except ImportError:
+ PygitRemoteCallbacks = object
+else:
+ PygitRemoteCallbacks = pygit2.RemoteCallbacks
+
+from ..utils.source import ArrowSources
+
+
+for pkg in ["requests", "urllib3", "github3"]:
+ logging.getLogger(pkg).setLevel(logging.WARNING)
+
+logger = logging.getLogger("crossbow")
+
+
+class CrossbowError(Exception):
+ pass
+
+
+def _flatten(mapping):
+ """Converts a hierarchical mapping to a flat dictionary"""
+ result = {}
+ for k, v in mapping.items():
+ if isinstance(v, dict):
+ for ik, iv in _flatten(v).items():
+ ik = ik if isinstance(ik, tuple) else (ik,)
+ result[(k,) + ik] = iv
+ elif isinstance(v, list):
+ for ik, iv in enumerate(_flatten(v)):
+ ik = ik if isinstance(ik, tuple) else (ik,)
+ result[(k,) + ik] = iv
+ else:
+ result[(k,)] = v
+ return result
+
+
+def _unflatten(mapping):
+ """Converts a flat tuple => object mapping to hierarchical one"""
+ result = {}
+ for path, value in mapping.items():
+ parents, leaf = path[:-1], path[-1]
+ # create the hierarchy until we reach the leaf value
+ temp = result
+ for parent in parents:
+ temp.setdefault(parent, {})
+ temp = temp[parent]
+ # set the leaf value
+ temp[leaf] = value
+
+ return result
+
+
+def _unflatten_tree(files):
+ """Converts a flat path => object mapping to a hierarchical directories
+
+ Input:
+ {
+ 'path/to/file.a': a_content,
+ 'path/to/file.b': b_content,
+ 'path/file.c': c_content
+ }
+ Output:
+ {
+ 'path': {
+ 'to': {
+ 'file.a': a_content,
+ 'file.b': b_content
+ },
+ 'file.c': c_content
+ }
+ }
+ """
+ files = {tuple(k.split('/')): v for k, v in files.items()}
+ return _unflatten(files)
+
+
+def _render_jinja_template(searchpath, template, params):
+ def format_all(items, pattern):
+ return [pattern.format(item) for item in items]
+
+ loader = jinja2.FileSystemLoader(searchpath)
+ env = jinja2.Environment(loader=loader, trim_blocks=True,
+ lstrip_blocks=True)
+ env.filters['format_all'] = format_all
+ template = env.get_template(template)
+ return template.render(**params)
+
+
+# configurations for setting up branch skipping
+# - appveyor has a feature to skip builds without an appveyor.yml
+# - travis reads from the master branch and applies the rules
+# - circle requires the configuration to be present on all branch, even ones
+# that are configured to be skipped
+# - azure skips branches without azure-pipelines.yml by default
+# - github skips branches without .github/workflows/ by default
+
+_default_travis_yml = """
+branches:
+ only:
+ - master
+ - /.*-travis-.*/
+
+os: linux
+dist: trusty
+language: generic
+"""
+
+_default_circle_yml = """
+version: 2
+
+jobs:
+ build:
+ machine: true
+
+workflows:
+ version: 2
+ build:
+ jobs:
+ - build:
+ filters:
+ branches:
+ only:
+ - /.*-circle-.*/
+"""
+
+_default_tree = {
+ '.travis.yml': _default_travis_yml,
+ '.circleci/config.yml': _default_circle_yml
+}
+
+
+class GitRemoteCallbacks(PygitRemoteCallbacks):
+
+ def __init__(self, token):
+ self.token = token
+ self.attempts = 0
+ super().__init__()
+
+ def push_update_reference(self, refname, message):
+ pass
+
+ def update_tips(self, refname, old, new):
+ pass
+
+ def credentials(self, url, username_from_url, allowed_types):
+ # its a libgit2 bug, that it infinitely retries the authentication
+ self.attempts += 1
+
+ if self.attempts >= 5:
+ # pygit2 doesn't propagate the exception properly
+ msg = 'Wrong oauth personal access token'
+ print(msg)
+ raise CrossbowError(msg)
+
+ if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERPASS_PLAINTEXT:
+ return pygit2.UserPass(self.token, 'x-oauth-basic')
+ else:
+ return None
+
+
+def _git_ssh_to_https(url):
+ return url.replace('git@github.com:', 'https://github.com/')
+
+
+class Repo:
+ """
+ Base class for interaction with local git repositories
+
+ A high level wrapper used for both reading revision information from
+ arrow's repository and pushing continuous integration tasks to the queue
+ repository.
+
+ Parameters
+ ----------
+ require_https : boolean, default False
+ Raise exception for SSH origin URLs
+ """
+
+ def __init__(self, path, github_token=None, remote_url=None,
+ require_https=False):
+ self.path = Path(path)
+ self.github_token = github_token
+ self.require_https = require_https
+ self._remote_url = remote_url
+ self._pygit_repo = None
+ self._github_repo = None # set by as_github_repo()
+ self._updated_refs = []
+
+ def __str__(self):
+ tpl = textwrap.dedent('''
+ Repo: {remote}@{branch}
+ Commit: {head}
+ ''')
+ return tpl.format(
+ remote=self.remote_url,
+ branch=self.branch.branch_name,
+ head=self.head
+ )
+
+ @property
+ def repo(self):
+ if self._pygit_repo is None:
+ self._pygit_repo = pygit2.Repository(str(self.path))
+ return self._pygit_repo
+
+ @property
+ def origin(self):
+ remote = self.repo.remotes['origin']
+ if self.require_https and remote.url.startswith('git@github.com'):
+ raise CrossbowError("Change SSH origin URL to HTTPS to use "
+ "Crossbow: {}".format(remote.url))
+ return remote
+
+ def fetch(self):
+ refspec = '+refs/heads/*:refs/remotes/origin/*'
+ self.origin.fetch([refspec])
+
+ def push(self, refs=None, github_token=None):
+ github_token = github_token or self.github_token
+ if github_token is None:
+ raise RuntimeError(
+ 'Could not determine GitHub token. Please set the '
+ 'CROSSBOW_GITHUB_TOKEN environment variable to a '
+ 'valid GitHub access token or pass one to --github-token.'
+ )
+ callbacks = GitRemoteCallbacks(github_token)
+ refs = refs or []
+ try:
+ self.origin.push(refs + self._updated_refs, callbacks=callbacks)
+ except pygit2.GitError:
+ raise RuntimeError('Failed to push updated references, '
+ 'potentially because of credential issues: {}'
+ .format(self._updated_refs))
+ else:
+ self.updated_refs = []
+
+ @property
+ def head(self):
+ """Currently checked out commit's sha"""
+ return self.repo.head
+
+ @property
+ def branch(self):
+ """Currently checked out branch"""
+ try:
+ return self.repo.branches[self.repo.head.shorthand]
+ except KeyError:
+ return None # detached
+
+ @property
+ def remote(self):
+ """Currently checked out branch's remote counterpart"""
+ try:
+ return self.repo.remotes[self.branch.upstream.remote_name]
+ except (AttributeError, KeyError):
+ return None # cannot detect
+
+ @property
+ def remote_url(self):
+ """Currently checked out branch's remote counterpart URL
+
+ If an SSH github url is set, it will be replaced by the https
+ equivalent usable with GitHub OAuth token.
+ """
+ try:
+ return self._remote_url or _git_ssh_to_https(self.remote.url)
+ except AttributeError:
+ return None
+
+ @property
+ def user_name(self):
+ try:
+ return next(self.repo.config.get_multivar('user.name'))
+ except StopIteration:
+ return os.environ.get('GIT_COMMITTER_NAME', 'unknown')
+
+ @property
+ def user_email(self):
+ try:
+ return next(self.repo.config.get_multivar('user.email'))
+ except StopIteration:
+ return os.environ.get('GIT_COMMITTER_EMAIL', 'unknown')
+
+ @property
+ def signature(self):
+ return pygit2.Signature(self.user_name, self.user_email,
+ int(time.time()))
+
+ def create_tree(self, files):
+ builder = self.repo.TreeBuilder()
+
+ for filename, content in files.items():
+ if isinstance(content, dict):
+ # create a subtree
+ tree_id = self.create_tree(content)
+ builder.insert(filename, tree_id, pygit2.GIT_FILEMODE_TREE)
+ else:
+ # create a file
+ blob_id = self.repo.create_blob(content)
+ builder.insert(filename, blob_id, pygit2.GIT_FILEMODE_BLOB)
+
+ tree_id = builder.write()
+ return tree_id
+
+ def create_commit(self, files, parents=None, message='',
+ reference_name=None):
+ if parents is None:
+ # by default use the main branch as the base of the new branch
+ # required to reuse github actions cache across crossbow tasks
+ commit, _ = self.repo.resolve_refish("master")
+ parents = [commit.id]
+ tree_id = self.create_tree(files)
+
+ author = committer = self.signature
+ commit_id = self.repo.create_commit(reference_name, author, committer,
+ message, tree_id, parents)
+ return self.repo[commit_id]
+
+ def create_branch(self, branch_name, files, parents=None, message='',
+ signature=None):
+ # create commit with the passed tree
+ commit = self.create_commit(files, parents=parents, message=message)
+
+ # create branch pointing to the previously created commit
+ branch = self.repo.create_branch(branch_name, commit)
+
+ # append to the pushable references
+ self._updated_refs.append('refs/heads/{}'.format(branch_name))
+
+ return branch
+
+ def create_tag(self, tag_name, commit_id, message=''):
+ tag_id = self.repo.create_tag(tag_name, commit_id,
+ pygit2.GIT_OBJ_COMMIT, self.signature,
+ message)
+
+ # append to the pushable references
+ self._updated_refs.append('refs/tags/{}'.format(tag_name))
+
+ return self.repo[tag_id]
+
+ def file_contents(self, commit_id, file):
+ commit = self.repo[commit_id]
+ entry = commit.tree[file]
+ blob = self.repo[entry.id]
+ return blob.data
+
+ def _parse_github_user_repo(self):
+ m = re.match(r'.*\/([^\/]+)\/([^\/\.]+)(\.git)?$', self.remote_url)
+ if m is None:
+ raise CrossbowError(
+ "Unable to parse the github owner and repository from the "
+ "repository's remote url '{}'".format(self.remote_url)
+ )
+ user, repo = m.group(1), m.group(2)
+ return user, repo
+
+ def as_github_repo(self, github_token=None):
+ """Converts it to a repository object which wraps the GitHub API"""
+ if self._github_repo is None:
+ if not _have_github3:
+ raise ImportError('Must install github3.py')
+ github_token = github_token or self.github_token
+ username, reponame = self._parse_github_user_repo()
+ session = github3.session.GitHubSession(
+ default_connect_timeout=10,
+ default_read_timeout=30
+ )
+ github = github3.GitHub(session=session)
+ github.login(token=github_token)
+ self._github_repo = github.repository(username, reponame)
+ return self._github_repo
+
+ def github_commit(self, sha):
+ repo = self.as_github_repo()
+ return repo.commit(sha)
+
+ def github_release(self, tag):
+ repo = self.as_github_repo()
+ try:
+ return repo.release_from_tag(tag)
+ except github3.exceptions.NotFoundError:
+ return None
+
+ def github_upload_asset_requests(self, release, path, name, mime,
+ max_retries=None, retry_backoff=None):
+ if max_retries is None:
+ max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
+ if retry_backoff is None:
+ retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
+
+ for i in range(max_retries):
+ try:
+ with open(path, 'rb') as fp:
+ result = release.upload_asset(name=name, asset=fp,
+ content_type=mime)
+ except github3.exceptions.ResponseError as e:
+ logger.error('Attempt {} has failed with message: {}.'
+ .format(i + 1, str(e)))
+ logger.error('Error message {}'.format(e.msg))
+ logger.error('List of errors provided by Github:')
+ for err in e.errors:
+ logger.error(' - {}'.format(err))
+
+ if e.code == 422:
+ # 422 Validation Failed, probably raised because
+ # ReleaseAsset already exists, so try to remove it before
+ # reattempting the asset upload
+ for asset in release.assets():
+ if asset.name == name:
+ logger.info('Release asset {} already exists, '
+ 'removing it...'.format(name))
+ asset.delete()
+ logger.info('Asset {} removed.'.format(name))
+ break
+ except github3.exceptions.ConnectionError as e:
+ logger.error('Attempt {} has failed with message: {}.'
+ .format(i + 1, str(e)))
+ else:
+ logger.info('Attempt {} has finished.'.format(i + 1))
+ return result
+
+ time.sleep(retry_backoff)
+
+ raise RuntimeError('Github asset uploading has failed!')
+
+ def github_upload_asset_curl(self, release, path, name, mime):
+ upload_url, _ = release.upload_url.split('{?')
+ upload_url += '?name={}'.format(name)
+
+ command = [
+ 'curl',
+ '--fail',
+ '-H', "Authorization: token {}".format(self.github_token),
+ '-H', "Content-Type: {}".format(mime),
+ '--data-binary', '@{}'.format(path),
+ upload_url
+ ]
+ return subprocess.run(command, shell=False, check=True)
+
+ def github_overwrite_release_assets(self, tag_name, target_commitish,
+ patterns, method='requests'):
+ # Since github has changed something the asset uploading via requests
+ # got instable, so prefer the cURL alternative.
+ # Potential cause:
+ # sigmavirus24/github3.py/issues/779#issuecomment-379470626
+ repo = self.as_github_repo()
+ if not tag_name:
+ raise CrossbowError('Empty tag name')
+ if not target_commitish:
+ raise CrossbowError('Empty target commit for the release tag')
+
+ # remove the whole release if it already exists
+ try:
+ release = repo.release_from_tag(tag_name)
+ except github3.exceptions.NotFoundError:
+ pass
+ else:
+ release.delete()
+
+ release = repo.create_release(tag_name, target_commitish)
+ for pattern in patterns:
+ for path in glob.glob(pattern, recursive=True):
+ name = os.path.basename(path)
+ size = os.path.getsize(path)
+ mime = mimetypes.guess_type(name)[0] or 'application/zip'
+
+ logger.info(
+ 'Uploading asset `{}` with mimetype {} and size {}...'
+ .format(name, mime, size)
+ )
+
+ if method == 'requests':
+ self.github_upload_asset_requests(release, path, name=name,
+ mime=mime)
+ elif method == 'curl':
+ self.github_upload_asset_curl(release, path, name=name,
+ mime=mime)
+ else:
+ raise CrossbowError(
+ 'Unsupported upload method {}'.format(method)
+ )
+
+
+class Queue(Repo):
+
+ def _latest_prefix_id(self, prefix):
+ pattern = re.compile(r'[\w\/-]*{}-(\d+)'.format(prefix))
+ matches = list(filter(None, map(pattern.match, self.repo.branches)))
+ if matches:
+ latest = max(int(m.group(1)) for m in matches)
+ else:
+ latest = -1
+ return latest
+
+ def _next_job_id(self, prefix):
+ """Auto increments the branch's identifier based on the prefix"""
+ latest_id = self._latest_prefix_id(prefix)
+ return '{}-{}'.format(prefix, latest_id + 1)
+
+ def latest_for_prefix(self, prefix):
+ latest_id = self._latest_prefix_id(prefix)
+ if latest_id < 0:
+ raise RuntimeError(
+ 'No job has been submitted with prefix {} yet'.format(prefix)
+ )
+ job_name = '{}-{}'.format(prefix, latest_id)
+ return self.get(job_name)
+
+ def date_of(self, job):
+ # it'd be better to bound to the queue repository on deserialization
+ # and reorganize these methods to Job
+ branch_name = 'origin/{}'.format(job.branch)
+ branch = self.repo.branches[branch_name]
+ commit = self.repo[branch.target]
+ return date.fromtimestamp(commit.commit_time)
+
+ def jobs(self, pattern):
+ """Return jobs sorted by its identifier in reverse order"""
+ job_names = []
+ for name in self.repo.branches.remote:
+ origin, name = name.split('/', 1)
+ result = re.match(pattern, name)
+ if result:
+ job_names.append(name)
+
+ for name in sorted(job_names, reverse=True):
+ yield self.get(name)
+
+ def get(self, job_name):
+ branch_name = 'origin/{}'.format(job_name)
+ branch = self.repo.branches[branch_name]
+ try:
+ content = self.file_contents(branch.target, 'job.yml')
+ except KeyError:
+ raise CrossbowError(
+ 'No job is found with name: {}'.format(job_name)
+ )
+
+ buffer = StringIO(content.decode('utf-8'))
+ job = yaml.load(buffer)
+ job.queue = self
+ return job
+
+ def put(self, job, prefix='build'):
+ if not isinstance(job, Job):
+ raise CrossbowError('`job` must be an instance of Job')
+ if job.branch is not None:
+ raise CrossbowError('`job.branch` is automatically generated, '
+ 'thus it must be blank')
+
+ if job.target.remote is None:
+ raise CrossbowError(
+ 'Cannot determine git remote for the Arrow repository to '
+ 'clone or push to, try to push the `{}` branch first to have '
+ 'a remote tracking counterpart.'.format(job.target.branch)
+ )
+ if job.target.branch is None:
+ raise CrossbowError(
+ 'Cannot determine the current branch of the Arrow repository '
+ 'to clone or push to, perhaps it is in detached HEAD state. '
+ 'Please checkout a branch.'
+ )
+
+ # auto increment and set next job id, e.g. build-85
+ job._queue = self
+ job.branch = self._next_job_id(prefix)
+
+ # create tasks' branches
+ for task_name, task in job.tasks.items():
+ # adding CI's name to the end of the branch in order to use skip
+ # patterns on travis and circleci
+ task.branch = '{}-{}-{}'.format(job.branch, task.ci, task_name)
+ params = {
+ **job.params,
+ "arrow": job.target,
+ "queue_remote_url": self.remote_url
+ }
+ files = task.render_files(job.template_searchpath, params=params)
+ branch = self.create_branch(task.branch, files=files)
+ self.create_tag(task.tag, branch.target)
+ task.commit = str(branch.target)
+
+ # create job's branch with its description
+ return self.create_branch(job.branch, files=job.render_files())
+
+
+def get_version(root, **kwargs):
+ """
+ Parse function for setuptools_scm that ignores tags for non-C++
+ subprojects, e.g. apache-arrow-js-XXX tags.
+ """
+ from setuptools_scm.git import parse as parse_git_version
+
+ # query the calculated version based on the git tags
+ kwargs['describe_command'] = (
+ 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
+ )
+ version = parse_git_version(root, **kwargs)
+
+ # increment the minor version, because there can be patch releases created
+ # from maintenance branches where the tags are unreachable from the
+ # master's HEAD, so the git command above generates 0.17.0.dev300 even if
+ # arrow has a never 0.17.1 patch release
+ pattern = r"^(\d+)\.(\d+)\.(\d+)$"
+ match = re.match(pattern, str(version.tag))
+ major, minor, patch = map(int, match.groups())
+
+ # the bumped version number after 0.17.x will be 0.18.0.dev300
+ return "{}.{}.{}.dev{}".format(major, minor + 1, patch, version.distance)
+
+
+class Serializable:
+
+ @classmethod
+ def to_yaml(cls, representer, data):
+ tag = '!{}'.format(cls.__name__)
+ dct = {k: v for k, v in data.__dict__.items() if not k.startswith('_')}
+ return representer.represent_mapping(tag, dct)
+
+
+class Target(Serializable):
+ """
+ Describes target repository and revision the builds run against
+
+ This serializable data container holding information about arrow's
+ git remote, branch, sha and version number as well as some metadata
+ (currently only an email address where the notification should be sent).
+ """
+
+ def __init__(self, head, branch, remote, version, email=None):
+ self.head = head
+ self.email = email
+ self.branch = branch
+ self.remote = remote
+ self.version = version
+ self.no_rc_version = re.sub(r'-rc\d+\Z', '', version)
+ # Semantic Versioning 1.0.0: https://semver.org/spec/v1.0.0.html
+ #
+ # > A pre-release version number MAY be denoted by appending an
+ # > arbitrary string immediately following the patch version and a
+ # > dash. The string MUST be comprised of only alphanumerics plus
+ # > dash [0-9A-Za-z-].
+ #
+ # Example:
+ #
+ # '0.16.1.dev10' ->
+ # '0.16.1-dev10'
+ self.no_rc_semver_version = \
+ re.sub(r'\.(dev\d+)\Z', r'-\1', self.no_rc_version)
+
+ @classmethod
+ def from_repo(cls, repo, head=None, branch=None, remote=None, version=None,
+ email=None):
+ """Initialize from a repository
+
+ Optionally override detected remote, branch, head, and/or version.
+ """
+ assert isinstance(repo, Repo)
+
+ if head is None:
+ head = str(repo.head.target)
+ if branch is None:
+ branch = repo.branch.branch_name
+ if remote is None:
+ remote = repo.remote_url
+ if version is None:
+ version = get_version(repo.path)
+ if email is None:
+ email = repo.user_email
+
+ return cls(head=head, email=email, branch=branch, remote=remote,
+ version=version)
+
+
+class Task(Serializable):
+ """
+ Describes a build task and metadata required to render CI templates
+
+ A task is represented as a single git commit and branch containing jinja2
+ rendered files (currently appveyor.yml or .travis.yml configurations).
+
+ A task can't be directly submitted to a queue, must belong to a job.
+ Each task's unique identifier is its branch name, which is generated after
+ submitting the job to a queue.
+ """
+
+ def __init__(self, ci, template, artifacts=None, params=None):
+ assert ci in {
+ 'circle',
+ 'travis',
+ 'appveyor',
+ 'azure',
+ 'github',
+ 'drone',
+ }
+ self.ci = ci
+ self.template = template
+ self.artifacts = artifacts or []
+ self.params = params or {}
+ self.branch = None # filled after adding to a queue
+ self.commit = None # filled after adding to a queue
+ self._queue = None # set by the queue object after put or get
+ self._status = None # status cache
+ self._assets = None # assets cache
+
+ def render_files(self, searchpath, params=None):
+ params = {**self.params, **(params or {}), "task": self}
+ try:
+ rendered = _render_jinja_template(searchpath, self.template,
+ params=params)
+ except jinja2.TemplateError as e:
+ raise RuntimeError(
+ 'Failed to render template `{}` with {}: {}'.format(
+ self.template, e.__class__.__name__, str(e)
+ )
+ )
+
+ tree = {**_default_tree, self.filename: rendered}
+ return _unflatten_tree(tree)
+
+ @property
+ def tag(self):
+ return self.branch
+
+ @property
+ def filename(self):
+ config_files = {
+ 'circle': '.circleci/config.yml',
+ 'travis': '.travis.yml',
+ 'appveyor': 'appveyor.yml',
+ 'azure': 'azure-pipelines.yml',
+ 'github': '.github/workflows/crossbow.yml',
+ 'drone': '.drone.yml',
+ }
+ return config_files[self.ci]
+
+ def status(self, force_query=False):
+ _status = getattr(self, '_status', None)
+ if force_query or _status is None:
+ github_commit = self._queue.github_commit(self.commit)
+ self._status = TaskStatus(github_commit)
+ return self._status
+
+ def assets(self, force_query=False):
+ _assets = getattr(self, '_assets', None)
+ if force_query or _assets is None:
+ github_release = self._queue.github_release(self.tag)
+ self._assets = TaskAssets(github_release,
+ artifact_patterns=self.artifacts)
+ return self._assets
+
+
+class TaskStatus:
+ """
+ Combine the results from status and checks API to a single state.
+
+ Azure pipelines uses checks API which doesn't provide a combined
+ interface like status API does, so we need to manually combine
+ both the commit statuses and the commit checks coming from
+ different API endpoint
+
+ Status.state: error, failure, pending or success, default pending
+ CheckRun.status: queued, in_progress or completed, default: queued
+ CheckRun.conclusion: success, failure, neutral, cancelled, timed_out
+ or action_required, only set if
+ CheckRun.status == 'completed'
+
+ 1. Convert CheckRun's status and conclusion to one of Status.state
+ 2. Merge the states based on the following rules:
+ - failure if any of the contexts report as error or failure
+ - pending if there are no statuses or a context is pending
+ - success if the latest status for all contexts is success
+ error otherwise.
+
+ Parameters
+ ----------
+ commit : github3.Commit
+ Commit to query the combined status for.
+
+ Returns
+ -------
+ TaskStatus(
+ combined_state='error|failure|pending|success',
+ github_status='original github status object',
+ github_check_runs='github checks associated with the commit',
+ total_count='number of statuses and checks'
+ )
+ """
+
+ def __init__(self, commit):
+ status = commit.status()
+ check_runs = list(commit.check_runs())
+ states = [s.state for s in status.statuses]
+
+ for check in check_runs:
+ if check.status == 'completed':
+ if check.conclusion in {'success', 'failure'}:
+ states.append(check.conclusion)
+ elif check.conclusion in {'cancelled', 'timed_out',
+ 'action_required'}:
+ states.append('error')
+ # omit `neutral` conclusion
+ else:
+ states.append('pending')
+
+ # it could be more effective, but the following is more descriptive
+ combined_state = 'error'
+ if len(states):
+ if any(state in {'error', 'failure'} for state in states):
+ combined_state = 'failure'
+ elif any(state == 'pending' for state in states):
+ combined_state = 'pending'
+ elif all(state == 'success' for state in states):
+ combined_state = 'success'
+
+ # show link to the actual build, some of the CI providers implement
+ # the statuses API others implement the checks API, so display both
+ build_links = [s.target_url for s in status.statuses]
+ build_links += [c.html_url for c in check_runs]
+
+ self.combined_state = combined_state
+ self.github_status = status
+ self.github_check_runs = check_runs
+ self.total_count = len(states)
+ self.build_links = build_links
+
+
+class TaskAssets(dict):
+
+ def __init__(self, github_release, artifact_patterns):
+ # HACK(kszucs): don't expect uploaded assets of no atifacts were
+ # defiened for the tasks in order to spare a bit of github rate limit
+ if not artifact_patterns:
+ return
+
+ if github_release is None:
+ github_assets = {} # no assets have been uploaded for the task
+ else:
+ github_assets = {a.name: a for a in github_release.assets()}
+
+ for pattern in artifact_patterns:
+ # artifact can be a regex pattern
+ compiled = re.compile(pattern)
+ matches = list(
+ filter(None, map(compiled.match, github_assets.keys()))
+ )
+ num_matches = len(matches)
+
+ # validate artifact pattern matches single asset
+ if num_matches == 0:
+ self[pattern] = None
+ elif num_matches == 1:
+ self[pattern] = github_assets[matches[0].group(0)]
+ else:
+ raise CrossbowError(
+ 'Only a single asset should match pattern `{}`, there are '
+ 'multiple ones: {}'.format(pattern, ', '.join(matches))
+ )
+
+ def missing_patterns(self):
+ return [pattern for pattern, asset in self.items() if asset is None]
+
+ def uploaded_assets(self):
+ return [asset for asset in self.values() if asset is not None]
+
+
+class Job(Serializable):
+ """Describes multiple tasks against a single target repository"""
+
+ def __init__(self, target, tasks, params=None, template_searchpath=None):
+ if not tasks:
+ raise ValueError('no tasks were provided for the job')
+ if not all(isinstance(task, Task) for task in tasks.values()):
+ raise ValueError('each `tasks` mus be an instance of Task')
+ if not isinstance(target, Target):
+ raise ValueError('`target` must be an instance of Target')
+ if not isinstance(target, Target):
+ raise ValueError('`target` must be an instance of Target')
+ if not isinstance(params, dict):
+ raise ValueError('`params` must be an instance of dict')
+
+ self.target = target
+ self.tasks = tasks
+ self.params = params or {} # additional parameters for the tasks
+ self.branch = None # filled after adding to a queue
+ self._queue = None # set by the queue object after put or get
+ if template_searchpath is None:
+ self._template_searchpath = ArrowSources.find().path
+ else:
+ self._template_searchpath = template_searchpath
+
+ def render_files(self):
+ with StringIO() as buf:
+ yaml.dump(self, buf)
+ content = buf.getvalue()
+ tree = {**_default_tree, "job.yml": content}
+ return _unflatten_tree(tree)
+
+ def render_tasks(self, params=None):
+ result = {}
+ params = {
+ **self.params,
+ "arrow": self.target,
+ **(params or {})
+ }
+ for task_name, task in self.tasks.items():
+ files = task.render_files(self._template_searchpath, params)
+ result[task_name] = files
+ return result
+
+ @property
+ def template_searchpath(self):
+ return self._template_searchpath
+
+ @property
+ def queue(self):
+ assert isinstance(self._queue, Queue)
+ return self._queue
+
+ @queue.setter
+ def queue(self, queue):
+ assert isinstance(queue, Queue)
+ self._queue = queue
+ for task in self.tasks.values():
+ task._queue = queue
+
+ @property
+ def email(self):
+ return os.environ.get('CROSSBOW_EMAIL', self.target.email)
+
+ @property
+ def date(self):
+ return self.queue.date_of(self)
+
+ def show(self, stream=None):
+ return yaml.dump(self, stream=stream)
+
+ @classmethod
+ def from_config(cls, config, target, tasks=None, groups=None, params=None):
+ """
+ Intantiate a job from based on a config.
+
+ Parameters
+ ----------
+ config : dict
+ Deserialized content of tasks.yml
+ target : Target
+ Describes target repository and revision the builds run against.
+ tasks : Optional[List[str]], default None
+ List of glob patterns for matching task names.
+ groups : Optional[List[str]], default None
+ List of exact group names matching predefined task sets in the
+ config.
+ params : Optional[Dict[str, str]], default None
+ Additional rendering parameters for the task templates.
+
+ Returns
+ -------
+ Job
+
+ Raises
+ ------
+ Exception:
+ If invalid groups or tasks has been passed.
+ """
+ task_definitions = config.select(tasks, groups=groups)
+
+ # instantiate the tasks
+ tasks = {}
+ versions = {'version': target.version,
+ 'no_rc_version': target.no_rc_version,
+ 'no_rc_semver_version': target.no_rc_semver_version}
+ for task_name, task in task_definitions.items():
+ artifacts = task.pop('artifacts', None) or [] # because of yaml
+ artifacts = [fn.format(**versions) for fn in artifacts]
+ tasks[task_name] = Task(artifacts=artifacts, **task)
+
+ return cls(target=target, tasks=tasks, params=params,
+ template_searchpath=config.template_searchpath)
+
+ def is_finished(self):
+ for task in self.tasks.values():
+ status = task.status(force_query=True)
+ if status.combined_state == 'pending':
+ return False
+ return True
+
+ def wait_until_finished(self, poll_max_minutes=120,
+ poll_interval_minutes=10):
+ started_at = time.time()
+ while True:
+ if self.is_finished():
+ break
+
+ waited_for_minutes = (time.time() - started_at) / 60
+ if waited_for_minutes > poll_max_minutes:
+ msg = ('Exceeded the maximum amount of time waiting for job '
+ 'to finish, waited for {} minutes.')
+ raise RuntimeError(msg.format(waited_for_minutes))
+
+ logger.info('Waiting {} minutes and then checking again'
+ .format(poll_interval_minutes))
+ time.sleep(poll_interval_minutes * 60)
+
+
+class Config(dict):
+
+ def __init__(self, tasks, template_searchpath):
+ super().__init__(tasks)
+ self.template_searchpath = template_searchpath
+
+ @classmethod
+ def load_yaml(cls, path):
+ path = Path(path)
+ searchpath = path.parent
+ rendered = _render_jinja_template(searchpath, template=path.name,
+ params={})
+ config = yaml.load(rendered)
+ return cls(config, template_searchpath=searchpath)
+
+ def show(self, stream=None):
+ return yaml.dump(dict(self), stream=stream)
+
+ def select(self, tasks=None, groups=None):
+ config_groups = dict(self['groups'])
+ config_tasks = dict(self['tasks'])
+ valid_groups = set(config_groups.keys())
+ valid_tasks = set(config_tasks.keys())
+ group_whitelist = list(groups or [])
+ task_whitelist = list(tasks or [])
+
+ # validate that the passed groups are defined in the config
+ requested_groups = set(group_whitelist)
+ invalid_groups = requested_groups - valid_groups
+ if invalid_groups:
+ msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format(
+ invalid_groups, valid_groups
+ )
+ raise CrossbowError(msg)
+
+ # merge the tasks defined in the selected groups
+ task_patterns = [list(config_groups[name]) for name in group_whitelist]
+ task_patterns = set(sum(task_patterns, task_whitelist))
+
+ # treat the task names as glob patterns to select tasks more easily
+ requested_tasks = set()
+ for pattern in task_patterns:
+ matches = fnmatch.filter(valid_tasks, pattern)
+ if len(matches):
+ requested_tasks.update(matches)
+ else:
+ raise CrossbowError(
+ "Unable to match any tasks for `{}`".format(pattern)
+ )
+
+ # validate that the passed and matched tasks are defined in the config
+ invalid_tasks = requested_tasks - valid_tasks
+ if invalid_tasks:
+ msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format(
+ invalid_tasks, valid_tasks
+ )
+ raise CrossbowError(msg)
+
+ return {
+ task_name: config_tasks[task_name] for task_name in requested_tasks
+ }
+
+ def validate(self):
+ # validate that the task groups are properly referening the tasks
+ for group_name, group in self['groups'].items():
+ for pattern in group:
+ tasks = self.select(tasks=[pattern])
+ if not tasks:
+ raise CrossbowError(
+ "The pattern `{}` defined for task group `{}` is not "
+ "matching any of the tasks defined in the "
+ "configuration file.".format(pattern, group_name)
+ )
+
+ # validate that the tasks are constructible
+ for task_name, task in self['tasks'].items():
+ try:
+ Task(**task)
+ except Exception as e:
+ raise CrossbowError(
+ 'Unable to construct a task object from the '
+ 'definition of task `{}`. The original error message '
+ 'is: `{}`'.format(task_name, str(e))
+ )
+
+ # validate that the defined tasks are renderable, in order to to that
+ # define the required object with dummy data
+ target = Target(
+ head='e279a7e06e61c14868ca7d71dea795420aea6539',
+ branch='master',
+ remote='https://github.com/apache/arrow',
+ version='1.0.0dev123',
+ email='dummy@example.ltd'
+ )
+
+ for task_name, task in self['tasks'].items():
+ task = Task(**task)
+ files = task.render_files(
+ self.template_searchpath,
+ params=dict(
+ arrow=target,
+ queue_remote_url='https://github.com/org/crossbow'
+ )
+ )
+ if not files:
+ raise CrossbowError('No files have been rendered for task `{}`'
+ .format(task_name))
+
+
+# configure yaml serializer
+yaml = YAML()
+yaml.register_class(Job)
+yaml.register_class(Task)
+yaml.register_class(Target)
diff --git a/dev/archery/archery/crossbow/reports.py b/dev/archery/archery/crossbow/reports.py
new file mode 100644
index 0000000..bc82db7
--- /dev/null
+++ b/dev/archery/archery/crossbow/reports.py
@@ -0,0 +1,302 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import click
+import collections
+import operator
+import functools
+from io import StringIO
+import textwrap
+
+
+# TODO(kszucs): use archery.report.JinjaReport instead
+class Report:
+
+ def __init__(self, job):
+ self.job = job
+
+ def show(self):
+ raise NotImplementedError()
+
+
+class ConsoleReport(Report):
+ """Report the status of a Job to the console using click"""
+
+ # output table's header template
+ HEADER = '[{state:>7}] {branch:<52} {content:>16}'
+ DETAILS = ' └ {url}'
+
+ # output table's row template for assets
+ ARTIFACT_NAME = '{artifact:>69} '
+ ARTIFACT_STATE = '[{state:>7}]'
+
+ # state color mapping to highlight console output
+ COLORS = {
+ # from CombinedStatus
+ 'error': 'red',
+ 'failure': 'red',
+ 'pending': 'yellow',
+ 'success': 'green',
+ # custom state messages
+ 'ok': 'green',
+ 'missing': 'red'
+ }
+
+ def lead(self, state, branch, n_uploaded, n_expected):
+ line = self.HEADER.format(
+ state=state.upper(),
+ branch=branch,
+ content='uploaded {} / {}'.format(n_uploaded, n_expected)
+ )
+ return click.style(line, fg=self.COLORS[state.lower()])
+
+ def header(self):
+ header = self.HEADER.format(
+ state='state',
+ branch='Task / Branch',
+ content='Artifacts'
+ )
+ delimiter = '-' * len(header)
+ return '{}\n{}'.format(header, delimiter)
+
+ def artifact(self, state, pattern, asset):
+ if asset is None:
+ artifact = pattern
+ state = 'pending' if state == 'pending' else 'missing'
+ else:
+ artifact = asset.name
+ state = 'ok'
+
+ name_ = self.ARTIFACT_NAME.format(artifact=artifact)
+ state_ = click.style(
+ self.ARTIFACT_STATE.format(state=state.upper()),
+ self.COLORS[state]
+ )
+ return name_ + state_
+
+ def show(self, outstream, asset_callback=None):
+ echo = functools.partial(click.echo, file=outstream)
+
+ # write table's header
+ echo(self.header())
+
+ # write table's body
+ for task_name, task in sorted(self.job.tasks.items()):
+ # if not task_name.startswith("test-debian-10-python-3"):
+ # continue
+ # write summary of the uploaded vs total assets
+ status = task.status()
+ assets = task.assets()
+
+ # mapping of artifact pattern to asset or None of not uploaded
+ n_expected = len(task.artifacts)
+ n_uploaded = len(assets.uploaded_assets())
+ echo(self.lead(status.combined_state, task_name, n_uploaded,
+ n_expected))
+
+ # show link to the actual build, some of the CI providers implement
+ # the statuses API others implement the checks API, so display both
+ for link in status.build_links:
+ echo(self.DETAILS.format(url=link))
+
+ # write per asset status
+ for artifact_pattern, asset in assets.items():
+ if asset_callback is not None:
+ asset_callback(task_name, task, asset)
+ echo(self.artifact(status.combined_state, artifact_pattern,
+ asset))
+
+
+class EmailReport(Report):
+
+ HEADER = textwrap.dedent("""
+ Arrow Build Report for Job {job_name}
+
+ All tasks: {all_tasks_url}
+ """)
+
+ TASK = textwrap.dedent("""
+ - {name}:
+ URL: {url}
+ """).strip()
+
+ EMAIL = textwrap.dedent("""
+ From: {sender_name} <{sender_email}>
+ To: {recipient_email}
+ Subject: {subject}
+
+ {body}
+ """).strip()
+
+ STATUS_HEADERS = {
+ # from CombinedStatus
+ 'error': 'Errored Tasks:',
+ 'failure': 'Failed Tasks:',
+ 'pending': 'Pending Tasks:',
+ 'success': 'Succeeded Tasks:',
+ }
+
+ def __init__(self, job, sender_name, sender_email, recipient_email):
+ self.sender_name = sender_name
+ self.sender_email = sender_email
+ self.recipient_email = recipient_email
+ super().__init__(job)
+
+ def url(self, query):
+ repo_url = self.job.queue.remote_url.strip('.git')
+ return '{}/branches/all?query={}'.format(repo_url, query)
+
+ def listing(self, tasks):
+ return '\n'.join(
+ sorted(
+ self.TASK.format(name=task_name, url=self.url(task.branch))
+ for task_name, task in tasks.items()
+ )
+ )
+
+ def header(self):
+ url = self.url(self.job.branch)
+ return self.HEADER.format(job_name=self.job.branch, all_tasks_url=url)
+
+ def subject(self):
+ return (
+ "[NIGHTLY] Arrow Build Report for Job {}".format(self.job.branch)
+ )
+
+ def body(self):
+ buffer = StringIO()
+ buffer.write(self.header())
+
+ tasks_by_state = collections.defaultdict(dict)
+ for task_name, task in self.job.tasks.items():
+ state = task.status().combined_state
+ tasks_by_state[state][task_name] = task
+
+ for state in ('failure', 'error', 'pending', 'success'):
+ if state in tasks_by_state:
+ tasks = tasks_by_state[state]
+ buffer.write('\n')
+ buffer.write(self.STATUS_HEADERS[state])
+ buffer.write('\n')
+ buffer.write(self.listing(tasks))
+ buffer.write('\n')
+
+ return buffer.getvalue()
+
+ def email(self):
+ return self.EMAIL.format(
+ sender_name=self.sender_name,
+ sender_email=self.sender_email,
+ recipient_email=self.recipient_email,
+ subject=self.subject(),
+ body=self.body()
+ )
+
+ def show(self, outstream):
+ outstream.write(self.email())
+
+ def send(self, smtp_user, smtp_password, smtp_server, smtp_port):
+ import smtplib
+
+ email = self.email()
+
+ server = smtplib.SMTP_SSL(smtp_server, smtp_port)
+ server.ehlo()
+ server.login(smtp_user, smtp_password)
+ server.sendmail(smtp_user, self.recipient_email, email)
+ server.close()
+
+
+class CommentReport(Report):
+
+ _markdown_badge = '[![{title}]({badge})]({url})'
+
+ badges = {
+ 'github': _markdown_badge.format(
+ title='Github Actions',
+ url='https://github.com/{repo}/actions?query=branch:{branch}',
+ badge=(
+ 'https://github.com/{repo}/workflows/Crossbow/'
+ 'badge.svg?branch={branch}'
+ ),
+ ),
+ 'azure': _markdown_badge.format(
+ title='Azure',
+ url=(
+ 'https://dev.azure.com/{repo}/_build/latest'
+ '?definitionId=1&branchName={branch}'
+ ),
+ badge=(
+ 'https://dev.azure.com/{repo}/_apis/build/status/'
+ '{repo_dotted}?branchName={branch}'
+ )
+ ),
+ 'travis': _markdown_badge.format(
+ title='TravisCI',
+ url='https://travis-ci.com/{repo}/branches',
+ badge='https://img.shields.io/travis/{repo}/{branch}.svg'
+ ),
+ 'circle': _markdown_badge.format(
+ title='CircleCI',
+ url='https://circleci.com/gh/{repo}/tree/{branch}',
+ badge=(
+ 'https://img.shields.io/circleci/build/github'
+ '/{repo}/{branch}.svg'
+ )
+ ),
+ 'appveyor': _markdown_badge.format(
+ title='Appveyor',
+ url='https://ci.appveyor.com/project/{repo}/history',
+ badge='https://img.shields.io/appveyor/ci/{repo}/{branch}.svg'
+ ),
+ 'drone': _markdown_badge.format(
+ title='Drone',
+ url='https://cloud.drone.io/{repo}',
+ badge='https://img.shields.io/drone/build/{repo}/{branch}.svg'
+ ),
+ }
+
+ def __init__(self, job, crossbow_repo):
+ self.crossbow_repo = crossbow_repo
+ super().__init__(job)
+
+ def show(self):
+ url = 'https://github.com/{repo}/branches/all?query={branch}'
+ sha = self.job.target.head
+
+ msg = 'Revision: {}\n\n'.format(sha)
+ msg += 'Submitted crossbow builds: [{repo} @ {branch}]'
+ msg += '({})\n'.format(url)
+ msg += '\n|Task|Status|\n|----|------|'
+
+ tasks = sorted(self.job.tasks.items(), key=operator.itemgetter(0))
+ for key, task in tasks:
+ branch = task.branch
+
+ try:
+ template = self.badges[task.ci]
+ badge = template.format(
+ repo=self.crossbow_repo,
+ repo_dotted=self.crossbow_repo.replace('/', '.'),
+ branch=branch
+ )
+ except KeyError:
+ badge = 'unsupported CI service `{}`'.format(task.ci)
+
+ msg += '\n|{}|{}|'.format(key, badge)
+
+ return msg.format(repo=self.crossbow_repo, branch=self.job.branch)
diff --git a/dev/archery/archery/tests/fixtures/crossbow-job.yaml b/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml
similarity index 100%
rename from dev/archery/archery/tests/fixtures/crossbow-job.yaml
rename to dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml
diff --git a/dev/archery/archery/tests/fixtures/crossbow-success-message.md b/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md
similarity index 100%
rename from dev/archery/archery/tests/fixtures/crossbow-success-message.md
rename to dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md
diff --git a/dev/archery/archery/utils/crossbow.py b/dev/archery/archery/crossbow/tests/test_core.py
similarity index 78%
copy from dev/archery/archery/utils/crossbow.py
copy to dev/archery/archery/crossbow/tests/test_core.py
index f28b976..5184742 100644
--- a/dev/archery/archery/utils/crossbow.py
+++ b/dev/archery/archery/crossbow/tests/test_core.py
@@ -15,9 +15,11 @@
# specific language governing permissions and limitations
# under the License.
-from .command import Command, default_bin
+from archery.utils.source import ArrowSources
+from archery.crossbow import Config
-class Crossbow(Command):
- def __init__(self, crossbow_bin=None):
- self.bin = default_bin(crossbow_bin, "arrow/dev/tasks/crossbow.py")
+def test_config():
+ src = ArrowSources.find()
+ conf = Config.load_yaml(src.dev / "tasks" / "tasks.yml")
+ conf.validate()
diff --git a/dev/archery/archery/crossbow/tests/test_crossbow_cli.py b/dev/archery/archery/crossbow/tests/test_crossbow_cli.py
new file mode 100644
index 0000000..ee9ba1e
--- /dev/null
+++ b/dev/archery/archery/crossbow/tests/test_crossbow_cli.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from click.testing import CliRunner
+import pytest
+
+from archery.crossbow.cli import crossbow
+from archery.utils.git import git
+
+
+@pytest.mark.integration
+def test_crossbow_submit(tmp_path):
+ runner = CliRunner()
+
+ def invoke(*args):
+ return runner.invoke(crossbow, ['--queue-path', str(tmp_path), *args])
+
+ # initialize an empty crossbow repository
+ git.run_cmd("init", str(tmp_path))
+ git.run_cmd("-C", str(tmp_path), "remote", "add", "origin",
+ "https://github.com/dummy/repo")
+ git.run_cmd("-C", str(tmp_path), "commit", "-m", "initial",
+ "--allow-empty")
+
+ result = invoke('check-config')
+ assert result.exit_code == 0
+
+ result = invoke('submit', '--no-fetch', '--no-push', '-g', 'wheel')
+ assert result.exit_code == 0
diff --git a/dev/archery/archery/crossbow/tests/test_reports.py b/dev/archery/archery/crossbow/tests/test_reports.py
new file mode 100644
index 0000000..0df292b
--- /dev/null
+++ b/dev/archery/archery/crossbow/tests/test_reports.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import textwrap
+
+from archery.crossbow.core import yaml
+from archery.crossbow.reports import CommentReport
+
+
+def test_crossbow_comment_formatter(load_fixture):
+ msg = load_fixture('crossbow-success-message.md')
+ job = load_fixture('crossbow-job.yaml', decoder=yaml.load)
+
+ report = CommentReport(job, crossbow_repo='ursa-labs/crossbow')
+ expected = msg.format(
+ repo='ursa-labs/crossbow',
+ branch='ursabot-1',
+ revision='f766a1d615dd1b7ee706d05102e579195951a61c',
+ status='has been succeeded.'
+ )
+ assert report.show() == textwrap.dedent(expected).strip()
diff --git a/dev/archery/archery/tests/test_bot.py b/dev/archery/archery/tests/test_bot.py
index 9b2d023..e00853c 100644
--- a/dev/archery/archery/tests/test_bot.py
+++ b/dev/archery/archery/tests/test_bot.py
@@ -16,18 +16,13 @@
# under the License.
import json
-from pathlib import Path
from unittest.mock import Mock
-import pytest
-import textwrap
-import responses as rsps
import click
-from ruamel.yaml import YAML
+import pytest
+import responses as rsps
-from archery.bot import (
- CommentBot, CommandError, CrossbowCommentFormatter, group
-)
+from archery.bot import CommentBot, CommandError, group
@pytest.fixture
@@ -36,18 +31,6 @@
yield mock
-def load_fixture(name):
- path = Path(__file__).parent / 'fixtures' / name
- with path.open('r') as fp:
- if name.endswith('.json'):
- return json.load(fp)
- elif name.endswith('.yaml'):
- yaml = YAML()
- return yaml.load(fp)
- else:
- return fp.read()
-
-
def github_url(path):
return 'https://api.github.com:443/{}'.format(path.strip('/'))
@@ -86,21 +69,6 @@
assert custom_handler('extra', extra='data') == {'extra': 'data'}
-def test_crossbow_comment_formatter():
- job = load_fixture('crossbow-job.yaml')
- msg = load_fixture('crossbow-success-message.md')
-
- formatter = CrossbowCommentFormatter(crossbow_repo='ursa-labs/crossbow')
- response = formatter.render(job)
- expected = msg.format(
- repo='ursa-labs/crossbow',
- branch='ursabot-1',
- revision='f766a1d615dd1b7ee706d05102e579195951a61c',
- status='has been succeeded.'
- )
- assert response == textwrap.dedent(expected).strip()
-
-
@pytest.mark.parametrize('fixture_name', [
# the bot is not mentioned, nothing to do
'event-issue-comment-not-mentioning-ursabot.json',
@@ -109,7 +77,7 @@
# non-authorized user sent the comment, do not respond
'event-issue-comment-by-non-authorized-user.json',
])
-def test_noop_events(fixture_name):
+def test_noop_events(load_fixture, fixture_name):
payload = load_fixture(fixture_name)
handler = Mock()
@@ -119,7 +87,7 @@
handler.assert_not_called()
-def test_issue_comment_without_pull_request(responses):
+def test_issue_comment_without_pull_request(load_fixture, responses):
responses.add(
responses.GET,
github_url('/repositories/169101701/issues/19'),
@@ -151,7 +119,7 @@
}
-def test_respond_with_usage(responses):
+def test_respond_with_usage(load_fixture, responses):
responses.add(
responses.GET,
github_url('/repositories/169101701/issues/26'),
@@ -190,7 +158,8 @@
('@ursabot build', '+1'),
('@ursabot listen', '-1'),
])
-def test_issue_comment_with_commands(responses, command, reaction):
+def test_issue_comment_with_commands(load_fixture, responses, command,
+ reaction):
responses.add(
responses.GET,
github_url('/repositories/169101701/issues/26'),
@@ -230,13 +199,3 @@
post = responses.calls[3]
assert json.loads(post.request.body) == {'content': reaction}
-
-
-# TODO(kszucs): properly mock it
-# def test_crossbow_submit():
-# from click.testing import CliRunner
-# runner = CliRunner()
-# result = runner.invoke(
-# bot, ['crossbow', 'submit', '-g', 'wheel', '--dry-run']
-# )
-# assert result.exit_code == 0
diff --git a/dev/archery/conftest.py b/dev/archery/conftest.py
new file mode 100644
index 0000000..06a643b
--- /dev/null
+++ b/dev/archery/conftest.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import pytest
+
+
+def pytest_addoption(parser):
+ parser.addoption(
+ "--enable-integration",
+ action="store_true",
+ default=False,
+ help="run slow tests"
+ )
+
+
+def pytest_configure(config):
+ config.addinivalue_line(
+ "markers",
+ (
+ "integration: mark test as integration tests involving more "
+ "extensive setup (only used for crossbow at the moment)"
+ )
+ )
+
+
+def pytest_collection_modifyitems(config, items):
+ if config.getoption("--enable-integration"):
+ return
+ marker = pytest.mark.skip(reason="need --enable-integration option to run")
+ for item in items:
+ if "integration" in item.keywords:
+ item.add_marker(marker)
+
+
+@pytest.fixture
+def load_fixture(request):
+ current_test_directory = pathlib.Path(request.node.fspath).parent
+
+ def decoder(path):
+ with path.open('r') as fp:
+ if path.suffix == '.json':
+ import json
+ return json.load(fp)
+ elif path.suffix == '.yaml':
+ import yaml
+ return yaml.load(fp)
+ else:
+ return fp.read()
+
+ def loader(name, decoder=decoder):
+ path = current_test_directory / 'fixtures' / name
+ return decoder(path)
+
+ return loader
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 2696c0a..892e6b2 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -26,12 +26,12 @@
extras = {
'benchmark': ['pandas'],
- # bot extra includes crossbow's dependencies
- 'bot': ['ruamel.yaml', 'pygithub', 'github3.py', 'jinja2', 'jira',
- 'pygit2', 'setuptools_scm', 'toolz'],
'docker': ['ruamel.yaml', 'python-dotenv'],
- 'release': ['jinja2', 'jira', 'semver', 'gitpython']
+ 'release': ['jinja2', 'jira', 'semver', 'gitpython'],
+ 'crossbow': ['github3.py', 'jinja2', 'pygit2', 'ruamel.yaml',
+ 'setuptools_scm'],
}
+extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']
extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
setup(
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index e51cd20..716a1b9 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -47,6 +47,7 @@
dev/archery/MANIFEST.in
dev/archery/requirements*.txt
dev/archery/archery/tests/fixtures/*
+dev/archery/archery/crossbow/tests/fixtures/*
dev/release/rat_exclude_files.txt
dev/tasks/homebrew-formulae/apache-arrow.rb
dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install
diff --git a/dev/tasks/conda-recipes/azure.linux.yml b/dev/tasks/conda-recipes/azure.linux.yml
index 3ab9504..161fd14 100755
--- a/dev/tasks/conda-recipes/azure.linux.yml
+++ b/dev/tasks/conda-recipes/azure.linux.yml
@@ -1,3 +1,5 @@
+{% import 'macros.jinja' as macros with context %}
+
jobs:
- job: linux
pool:
@@ -19,21 +21,11 @@
displayName: Configure binfmt_misc
condition: not(startsWith(variables['CONFIG'], 'linux_64'))
- - script: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- displayName: Clone arrow
+ {{ macros.azure_checkout_arrow() }}
- # Using github release tries to find a common ancestor between the
- # currently pushed tag and the latest tag of the github repository
- # (don't know why).
- # The tag upload took 43 minutes because of this scan, so use an
- # alternative upload script.
- task: CondaEnvironment@1
inputs:
- packageSpecs: 'click github3.py jinja2 jira pygit2 ruamel.yaml setuptools_scm toolz anaconda-client shyaml'
+ packageSpecs: 'anaconda-client shyaml'
installOptions: '-c conda-forge'
updateConda: false
@@ -42,22 +34,5 @@
CI=azure arrow/dev/tasks/conda-recipes/run_docker_build.sh $(pwd)/build_artifacts
displayName: Run docker build
- # Upload to github releases
- - script: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --sha {{ task.branch }} \
- --tag {{ task.tag }} \
- --pattern "build_artifacts/linux-64/*.tar.bz2"
- env:
- CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN)
- displayName: Upload packages as a GitHub release
-
- {% if arrow.branch == 'master' %}
- # Upload to custom anaconda channel
- - script: |
- anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force build_artifacts/linux-64/*.tar.bz2
- displayName: Upload packages to Anaconda
- {% endif %}
+ {{ macros.azure_upload_releases("build_artifacts/linux-64/*.tar.bz2") }}
+ {{ macros.azure_upload_anaconda("build_artifacts/linux-64/*.tar.bz2") }}
diff --git a/dev/tasks/conda-recipes/azure.osx.yml b/dev/tasks/conda-recipes/azure.osx.yml
index 3251754..58afa80 100755
--- a/dev/tasks/conda-recipes/azure.osx.yml
+++ b/dev/tasks/conda-recipes/azure.osx.yml
@@ -1,3 +1,5 @@
+{% import 'macros.jinja' as macros with context %}
+
jobs:
- job: osx
pool:
@@ -27,12 +29,7 @@
conda install -n base -c conda-forge --quiet --yes conda-forge-ci-setup=3 conda-build
displayName: 'Add conda-forge-ci-setup=3'
- - script: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- displayName: Clone arrow
+ {{ macros.azure_checkout_arrow() }}
- script: |
source activate base
@@ -44,9 +41,8 @@
conda update --yes --quiet --override-channels -c conda-forge -c defaults --all
displayName: Configure conda and conda-build
workingDirectory: arrow/dev/tasks/conda-recipes
- env: {
+ env:
OSX_FORCE_SDK_DOWNLOAD: "1"
- }
- script: |
source activate base
@@ -75,31 +71,5 @@
workingDirectory: arrow/dev/tasks/conda-recipes
displayName: Build recipes
- # Using github release tries to find a common ancestor between the
- # currently pushed tag and the latest tag of the github repository
- # (don't know why).
- # The tag upload took 43 minutes because of this scan, so use an
- # alternative upload script.
- - script: |
- source activate base
- conda install -y click github3.py jinja2 jira pygit2 ruamel.yaml setuptools_scm toolz
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --sha {{ task.branch }} \
- --tag {{ task.tag }} \
- --pattern "arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2"
- env:
- CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN)
- displayName: Upload packages as a GitHub release
-
- {% if arrow.branch == 'master' %}
- # Upload to custom anaconda channel
- - script: |
- source activate base
- conda install -y anaconda-client
- anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force build_artifacts/osx-64/*.tar.bz2
- displayName: Upload packages to Anaconda
- workingDirectory: arrow/dev/tasks/conda-recipes
- {% endif %}
+ {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
+ {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
diff --git a/dev/tasks/conda-recipes/azure.win.yml b/dev/tasks/conda-recipes/azure.win.yml
index d432274..a3ec693 100755
--- a/dev/tasks/conda-recipes/azure.win.yml
+++ b/dev/tasks/conda-recipes/azure.win.yml
@@ -1,3 +1,5 @@
+{% import 'macros.jinja' as macros with context %}
+
jobs:
- job: win
pool:
@@ -41,12 +43,7 @@
displayName: Install conda-build and activate environment
- script: set PYTHONUNBUFFERED=1
- - script: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- displayName: Clone arrow
+ {{ macros.azure_checkout_arrow()|indent(2) }}
# Configure the VM
- script: setup_conda_rc .\ .\ .\.ci_support\%CONFIG%.yaml
@@ -76,34 +73,5 @@
PYTHONUNBUFFERED: 1
condition: contains(variables['R_CONFIG'], 'win')
- # Using github release tries to find a common ancestor between the
- # currently pushed tag and the latest tag of the github repository
- # (don't know why).
- # The tag upload took 43 minutes because of this scan, so use an
- # alternative upload script.
- - task: CondaEnvironment@1
- inputs:
- packageSpecs: 'click github3.py jinja2 jira pygit2 ruamel.yaml setuptools_scm toolz'
- installOptions: '-c conda-forge'
- updateConda: false
- - script: |
- python arrow/dev/tasks/crossbow.py ^
- --queue-path . ^
- --queue-remote {{ queue_remote_url }} ^
- upload-artifacts ^
- --sha {{ task.branch }} ^
- --tag {{ task.tag }} ^
- --pattern "D:\bld\win-64\*.tar.bz2"
- env:
- CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN)
- displayName: Upload packages as a GitHub release
-
- {% if arrow.branch == 'master' %}
- # Upload to custom anaconda channel
- - script: |
- source activate base
- conda install -y anaconda-client
- anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force D:\bld\win-64\*.tar.bz2
- displayName: Upload packages to Anaconda
- workingDirectory: arrow/dev/tasks/conda-recipes
- {% endif %}
+ {{ macros.azure_upload_releases("D:\bld\win-64\*.tar.bz2")|indent(2) }}
+ {{ macros.azure_upload_anaconda("D:\bld\win-64\*.tar.bz2")|indent(2) }}
diff --git a/dev/tasks/conda-recipes/drone-steps.sh b/dev/tasks/conda-recipes/drone-steps.sh
index 7156410..dffdb41 100755
--- a/dev/tasks/conda-recipes/drone-steps.sh
+++ b/dev/tasks/conda-recipes/drone-steps.sh
@@ -12,9 +12,11 @@
$FEEDSTOCK_ROOT/build_steps.sh ${OUTPUT_DIR}
# Upload as Github release
-mamba install -y click github3.py jinja2 jira pygit2 ruamel.yaml setuptools_scm toolz anaconda-client shyaml -c conda-forge
+mamba install -y anaconda-client shyaml -c conda-forge
+
pushd $DRONE_WORKSPACE
-python arrow/dev/tasks/crossbow.py \
+pip install -e arrow/dev/archery[crossbow]
+archery crossbow \
--queue-path . \
--queue-remote ${QUEUE_REMOTE_URL} \
upload-artifacts \
diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py
deleted file mode 100755
index 2874472..0000000
--- a/dev/tasks/crossbow.py
+++ /dev/null
@@ -1,1681 +0,0 @@
-#!/usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import re
-import fnmatch
-import glob
-import time
-import logging
-import mimetypes
-import subprocess
-import textwrap
-from io import StringIO
-from pathlib import Path
-from textwrap import dedent
-from datetime import date
-from functools import partial
-
-import click
-import toolz
-
-from ruamel.yaml import YAML
-
-try:
- import github3
- _have_github3 = True
-except ImportError:
- github3 = object
- _have_github3 = False
-
-try:
- import pygit2
-except ImportError:
- PygitRemoteCallbacks = object
-else:
- PygitRemoteCallbacks = pygit2.RemoteCallbacks
-
-
-# initialize logging
-logging.basicConfig()
-logging.getLogger().setLevel(logging.ERROR)
-
-# enable verbose logging for requests
-# http_client.HTTPConnection.debuglevel = 1
-requests_log = logging.getLogger("requests.packages.urllib3")
-requests_log.setLevel(logging.ERROR)
-requests_log.propagate = True
-
-
-CWD = Path(__file__).parent.absolute()
-
-
-def unflatten(mapping):
- """Converts a flat tuple => object mapping to hierarchical one"""
- result = {}
- for path, value in mapping.items():
- parents, leaf = path[:-1], path[-1]
- # create the hierarchy until we reach the leaf value
- temp = result
- for parent in parents:
- temp.setdefault(parent, {})
- temp = temp[parent]
- # set the leaf value
- temp[leaf] = value
-
- return result
-
-
-def unflatten_tree(files):
- """Converts a flat path => object mapping to a hierarchical directories
-
- Input:
- {
- 'path/to/file.a': a_content,
- 'path/to/file.b': b_content,
- 'path/file.c': c_content
- }
- Output:
- {
- 'path': {
- 'to': {
- 'file.a': a_content,
- 'file.b': b_content
- },
- 'file.c': c_content
- }
- }
- """
- files = toolz.keymap(lambda path: tuple(path.split('/')), files)
- return unflatten(files)
-
-
-# configurations for setting up branch skipping
-# - appveyor has a feature to skip builds without an appveyor.yml
-# - travis reads from the master branch and applies the rules
-# - circle requires the configuration to be present on all branch, even ones
-# that are configured to be skipped
-# - azure skips branches without azure-pipelines.yml by default
-# - github skips branches without .github/workflows/ by default
-
-_default_travis_yml = """
-branches:
- only:
- - master
- - /.*-travis-.*/
-
-os: linux
-dist: trusty
-language: generic
-"""
-
-_default_circle_yml = """
-version: 2
-
-jobs:
- build:
- machine: true
-
-workflows:
- version: 2
- build:
- jobs:
- - build:
- filters:
- branches:
- only:
- - /.*-circle-.*/
-"""
-
-_default_tree = {
- '.travis.yml': _default_travis_yml,
- '.circleci/config.yml': _default_circle_yml
-}
-
-
-class GitRemoteCallbacks(PygitRemoteCallbacks):
-
- def __init__(self, token):
- self.token = token
- self.attempts = 0
- super().__init__()
-
- def push_update_reference(self, refname, message):
- pass
-
- def update_tips(self, refname, old, new):
- pass
-
- def credentials(self, url, username_from_url, allowed_types):
- # its a libgit2 bug, that it infinitely retries the authentication
- self.attempts += 1
-
- if self.attempts >= 5:
- # pygit2 doesn't propagate the exception properly
- msg = 'Wrong oauth personal access token'
- print(msg)
- raise ValueError(msg)
-
- if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERPASS_PLAINTEXT:
- return pygit2.UserPass(self.token, 'x-oauth-basic')
- else:
- return None
-
-
-def _git_ssh_to_https(url):
- return url.replace('git@github.com:', 'https://github.com/')
-
-
-class Repo:
- """Base class for interaction with local git repositories
-
- A high level wrapper used for both reading revision information from
- arrow's repository and pushing continuous integration tasks to the queue
- repository.
-
- Parameters
- ----------
- require_https : boolean, default False
- Raise exception for SSH origin URLs
- """
- def __init__(self, path, github_token=None, remote_url=None,
- require_https=False):
- self.path = Path(path)
- self.github_token = github_token
- self.require_https = require_https
- self._remote_url = remote_url
- self._pygit_repo = None
- self._github_repo = None # set by as_github_repo()
- self._updated_refs = []
-
- def __str__(self):
- tpl = dedent('''
- Repo: {remote}@{branch}
- Commit: {head}
- ''')
- return tpl.format(
- remote=self.remote_url,
- branch=self.branch.branch_name,
- head=self.head
- )
-
- @property
- def repo(self):
- if self._pygit_repo is None:
- self._pygit_repo = pygit2.Repository(str(self.path))
- return self._pygit_repo
-
- @property
- def origin(self):
- remote = self.repo.remotes['origin']
- if self.require_https and remote.url.startswith('git@github.com'):
- raise ValueError("Change SSH origin URL to HTTPS to use "
- "Crossbow: {}".format(remote.url))
- return remote
-
- def fetch(self):
- refspec = '+refs/heads/*:refs/remotes/origin/*'
- self.origin.fetch([refspec])
-
- def push(self, refs=None, github_token=None):
- github_token = github_token or self.github_token
- if github_token is None:
- raise click.ClickException(
- 'Could not determine GitHub token. Please set the '
- 'CROSSBOW_GITHUB_TOKEN environment variable to a '
- 'valid GitHub access token or pass one to --github-token.'
- )
- callbacks = GitRemoteCallbacks(github_token)
- refs = refs or []
- try:
- self.origin.push(refs + self._updated_refs, callbacks=callbacks)
- except pygit2.GitError:
- raise RuntimeError('Failed to push updated references, '
- 'potentially because of credential issues: {}'
- .format(self._updated_refs))
- else:
- self.updated_refs = []
-
- @property
- def head(self):
- """Currently checked out commit's sha"""
- return self.repo.head
-
- @property
- def branch(self):
- """Currently checked out branch"""
- try:
- return self.repo.branches[self.repo.head.shorthand]
- except KeyError:
- return None # detached
-
- @property
- def remote(self):
- """Currently checked out branch's remote counterpart"""
- try:
- return self.repo.remotes[self.branch.upstream.remote_name]
- except (AttributeError, KeyError):
- return None # cannot detect
-
- @property
- def remote_url(self):
- """Currently checked out branch's remote counterpart URL
-
- If an SSH github url is set, it will be replaced by the https
- equivalent usable with GitHub OAuth token.
- """
- try:
- return self._remote_url or _git_ssh_to_https(self.remote.url)
- except AttributeError:
- return None
-
- @property
- def user_name(self):
- try:
- return next(self.repo.config.get_multivar('user.name'))
- except StopIteration:
- return os.environ.get('GIT_COMMITTER_NAME', 'unknown')
-
- @property
- def user_email(self):
- try:
- return next(self.repo.config.get_multivar('user.email'))
- except StopIteration:
- return os.environ.get('GIT_COMMITTER_EMAIL', 'unknown')
-
- @property
- def signature(self):
- return pygit2.Signature(self.user_name, self.user_email,
- int(time.time()))
-
- def create_tree(self, files):
- builder = self.repo.TreeBuilder()
-
- for filename, content in files.items():
- if isinstance(content, dict):
- # create a subtree
- tree_id = self.create_tree(content)
- builder.insert(filename, tree_id, pygit2.GIT_FILEMODE_TREE)
- else:
- # create a file
- blob_id = self.repo.create_blob(content)
- builder.insert(filename, blob_id, pygit2.GIT_FILEMODE_BLOB)
-
- tree_id = builder.write()
- return tree_id
-
- def create_commit(self, files, parents=None, message='',
- reference_name=None):
- if parents is None:
- # by default use the main branch as the base of the new branch
- # required to reuse github actions cache across crossbow tasks
- commit, _ = self.repo.resolve_refish("master")
- parents = [commit.id]
- tree_id = self.create_tree(files)
-
- author = committer = self.signature
- commit_id = self.repo.create_commit(reference_name, author, committer,
- message, tree_id, parents)
- return self.repo[commit_id]
-
- def create_branch(self, branch_name, files, parents=None, message='',
- signature=None):
- # create commit with the passed tree
- commit = self.create_commit(files, parents=parents, message=message)
-
- # create branch pointing to the previously created commit
- branch = self.repo.create_branch(branch_name, commit)
-
- # append to the pushable references
- self._updated_refs.append('refs/heads/{}'.format(branch_name))
-
- return branch
-
- def create_tag(self, tag_name, commit_id, message=''):
- tag_id = self.repo.create_tag(tag_name, commit_id,
- pygit2.GIT_OBJ_COMMIT, self.signature,
- message)
-
- # append to the pushable references
- self._updated_refs.append('refs/tags/{}'.format(tag_name))
-
- return self.repo[tag_id]
-
- def file_contents(self, commit_id, file):
- commit = self.repo[commit_id]
- entry = commit.tree[file]
- blob = self.repo[entry.id]
- return blob.data
-
- def _parse_github_user_repo(self):
- m = re.match(r'.*\/([^\/]+)\/([^\/\.]+)(\.git)?$', self.remote_url)
- if m is None:
- raise ValueError("Unable to parse the github owner and repository "
- "from the repository's remote url '{}'"
- .format(self.remote_url))
- user, repo = m.group(1), m.group(2)
- return user, repo
-
- def as_github_repo(self, github_token=None):
- """Converts it to a repository object which wraps the GitHub API"""
- if self._github_repo is None:
- if not _have_github3:
- raise ImportError('Must install github3.py')
- github_token = github_token or self.github_token
- username, reponame = self._parse_github_user_repo()
- session = github3.session.GitHubSession(
- default_connect_timeout=10,
- default_read_timeout=30
- )
- github = github3.GitHub(session=session)
- github.login(token=github_token)
- self._github_repo = github.repository(username, reponame)
- return self._github_repo
-
- def github_commit(self, sha):
- repo = self.as_github_repo()
- return repo.commit(sha)
-
- def github_release(self, tag):
- repo = self.as_github_repo()
- try:
- return repo.release_from_tag(tag)
- except github3.exceptions.NotFoundError:
- return None
-
- def github_upload_asset_requests(self, release, path, name, mime,
- max_retries=None, retry_backoff=None):
- if max_retries is None:
- max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
- if retry_backoff is None:
- retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
-
- for i in range(max_retries):
- try:
- with open(path, 'rb') as fp:
- result = release.upload_asset(name=name, asset=fp,
- content_type=mime)
- except github3.exceptions.ResponseError as e:
- click.echo('Attempt {} has failed with message: {}.'
- .format(i + 1, str(e)))
- click.echo('Error message {}'.format(e.msg))
- click.echo('List of errors provided by Github:')
- for err in e.errors:
- click.echo(' - {}'.format(err))
-
- if e.code == 422:
- # 422 Validation Failed, probably raised because
- # ReleaseAsset already exists, so try to remove it before
- # reattempting the asset upload
- for asset in release.assets():
- if asset.name == name:
- click.echo('Release asset {} already exists, '
- 'removing it...'.format(name))
- asset.delete()
- click.echo('Asset {} removed.'.format(name))
- break
- except github3.exceptions.ConnectionError as e:
- click.echo('Attempt {} has failed with message: {}.'
- .format(i + 1, str(e)))
- else:
- click.echo('Attempt {} has finished.'.format(i + 1))
- return result
-
- time.sleep(retry_backoff)
-
- raise RuntimeError('Github asset uploading has failed!')
-
- def github_upload_asset_curl(self, release, path, name, mime):
- upload_url, _ = release.upload_url.split('{?')
- upload_url += '?name={}'.format(name)
-
- command = [
- 'curl',
- '--fail',
- '-H', "Authorization: token {}".format(self.github_token),
- '-H', "Content-Type: {}".format(mime),
- '--data-binary', '@{}'.format(path),
- upload_url
- ]
- return subprocess.run(command, shell=False, check=True)
-
- def github_overwrite_release_assets(self, tag_name, target_commitish,
- patterns, method='requests'):
- # Since github has changed something the asset uploading via requests
- # got instable, so prefer the cURL alternative.
- # Potential cause:
- # sigmavirus24/github3.py/issues/779#issuecomment-379470626
- repo = self.as_github_repo()
- if not tag_name:
- raise ValueError('Empty tag name')
- if not target_commitish:
- raise ValueError('Empty target commit for the release tag')
-
- # remove the whole release if it already exists
- try:
- release = repo.release_from_tag(tag_name)
- except github3.exceptions.NotFoundError:
- pass
- else:
- release.delete()
-
- release = repo.create_release(tag_name, target_commitish)
- for pattern in patterns:
- for path in glob.glob(pattern, recursive=True):
- name = os.path.basename(path)
- size = os.path.getsize(path)
- mime = mimetypes.guess_type(name)[0] or 'application/zip'
-
- click.echo(
- 'Uploading asset `{}` with mimetype {} and size {}...'
- .format(name, mime, size)
- )
-
- if method == 'requests':
- self.github_upload_asset_requests(release, path, name=name,
- mime=mime)
- elif method == 'curl':
- self.github_upload_asset_curl(release, path, name=name,
- mime=mime)
- else:
- raise ValueError(
- 'Unsupported upload method {}'.format(method)
- )
-
-
-class Queue(Repo):
-
- def _latest_prefix_id(self, prefix):
- pattern = re.compile(r'[\w\/-]*{}-(\d+)'.format(prefix))
- matches = list(filter(None, map(pattern.match, self.repo.branches)))
- if matches:
- latest = max(int(m.group(1)) for m in matches)
- else:
- latest = -1
- return latest
-
- def _next_job_id(self, prefix):
- """Auto increments the branch's identifier based on the prefix"""
- latest_id = self._latest_prefix_id(prefix)
- return '{}-{}'.format(prefix, latest_id + 1)
-
- def latest_for_prefix(self, prefix):
- latest_id = self._latest_prefix_id(prefix)
- if latest_id < 0:
- raise RuntimeError(
- 'No job has been submitted with prefix {} yet'.format(prefix)
- )
- job_name = '{}-{}'.format(prefix, latest_id)
- return self.get(job_name)
-
- def date_of(self, job):
- # it'd be better to bound to the queue repository on deserialization
- # and reorganize these methods to Job
- branch_name = 'origin/{}'.format(job.branch)
- branch = self.repo.branches[branch_name]
- commit = self.repo[branch.target]
- return date.fromtimestamp(commit.commit_time)
-
- def jobs(self, pattern):
- """Return jobs sorted by its identifier in reverse order"""
- job_names = []
- for name in self.repo.branches.remote:
- origin, name = name.split('/', 1)
- result = re.match(pattern, name)
- if result:
- job_names.append(name)
-
- for name in sorted(job_names, reverse=True):
- yield self.get(name)
-
- def get(self, job_name):
- branch_name = 'origin/{}'.format(job_name)
- branch = self.repo.branches[branch_name]
- try:
- content = self.file_contents(branch.target, 'job.yml')
- except KeyError:
- raise ValueError('No job is found with name: {}'.format(job_name))
-
- buffer = StringIO(content.decode('utf-8'))
- job = yaml.load(buffer)
- job.queue = self
- return job
-
- def put(self, job, prefix='build'):
- if not isinstance(job, Job):
- raise ValueError('`job` must be an instance of Job')
- if job.branch is not None:
- raise ValueError('`job.branch` is automatically generated, thus '
- 'it must be blank')
-
- if job.target.remote is None:
- raise RuntimeError(
- 'Cannot determine git remote for the Arrow repository to '
- 'clone or push to, try to push the `{}` branch first to have '
- 'a remote tracking counterpart.'.format(job.target.branch)
- )
- if job.target.branch is None:
- raise RuntimeError(
- 'Cannot determine the current branch of the Arrow repository '
- 'to clone or push to, perhaps it is in detached HEAD state. '
- 'Please checkout a branch.'
- )
-
- # auto increment and set next job id, e.g. build-85
- job._queue = self
- job.branch = self._next_job_id(prefix)
-
- # create tasks' branches
- for task_name, task in job.tasks.items():
- # adding CI's name to the end of the branch in order to use skip
- # patterns on travis and circleci
- task.branch = '{}-{}-{}'.format(job.branch, task.ci, task_name)
- files = task.render_files(**job.params,
- arrow=job.target,
- queue_remote_url=self.remote_url)
- branch = self.create_branch(task.branch, files=files)
- self.create_tag(task.tag, branch.target)
- task.commit = str(branch.target)
-
- # create job's branch with its description
- return self.create_branch(job.branch, files=job.render_files())
-
-
-def get_version(root, **kwargs):
- """
- Parse function for setuptools_scm that ignores tags for non-C++
- subprojects, e.g. apache-arrow-js-XXX tags.
- """
- from setuptools_scm.git import parse as parse_git_version
-
- # query the calculated version based on the git tags
- kwargs['describe_command'] = (
- 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
- )
- version = parse_git_version(root, **kwargs)
-
- # increment the minor version, because there can be patch releases created
- # from maintenance branches where the tags are unreachable from the
- # master's HEAD, so the git command above generates 0.17.0.dev300 even if
- # arrow has a never 0.17.1 patch release
- pattern = r"^(\d+)\.(\d+)\.(\d+)$"
- match = re.match(pattern, str(version.tag))
- major, minor, patch = map(int, match.groups())
-
- # the bumped version number after 0.17.x will be 0.18.0.dev300
- return "{}.{}.{}.dev{}".format(major, minor + 1, patch, version.distance)
-
-
-class Serializable:
-
- @classmethod
- def to_yaml(cls, representer, data):
- tag = '!{}'.format(cls.__name__)
- dct = {k: v for k, v in data.__dict__.items() if not k.startswith('_')}
- return representer.represent_mapping(tag, dct)
-
-
-class Target(Serializable):
- """Describes target repository and revision the builds run against
-
- This serializable data container holding information about arrow's
- git remote, branch, sha and version number as well as some metadata
- (currently only an email address where the notification should be sent).
- """
-
- def __init__(self, head, branch, remote, version, email=None):
- self.head = head
- self.email = email
- self.branch = branch
- self.remote = remote
- self.version = version
- self.no_rc_version = re.sub(r'-rc\d+\Z', '', version)
- # Semantic Versioning 1.0.0: https://semver.org/spec/v1.0.0.html
- #
- # > A pre-release version number MAY be denoted by appending an
- # > arbitrary string immediately following the patch version and a
- # > dash. The string MUST be comprised of only alphanumerics plus
- # > dash [0-9A-Za-z-].
- #
- # Example:
- #
- # '0.16.1.dev10' ->
- # '0.16.1-dev10'
- self.no_rc_semver_version = \
- re.sub(r'\.(dev\d+)\Z', r'-\1', self.no_rc_version)
-
- @classmethod
- def from_repo(cls, repo, head=None, branch=None, remote=None, version=None,
- email=None):
- """Initialize from a repository
-
- Optionally override detected remote, branch, head, and/or version.
- """
- assert isinstance(repo, Repo)
-
- if head is None:
- head = str(repo.head.target)
- if branch is None:
- branch = repo.branch.branch_name
- if remote is None:
- remote = repo.remote_url
- if version is None:
- version = get_version(repo.path)
- if email is None:
- email = repo.user_email
-
- return cls(head=head, email=email, branch=branch, remote=remote,
- version=version)
-
-
-class Task(Serializable):
- """Describes a build task and metadata required to render CI templates
-
- A task is represented as a single git commit and branch containing jinja2
- rendered files (currently appveyor.yml or .travis.yml configurations).
-
- A task can't be directly submitted to a queue, must belong to a job.
- Each task's unique identifier is its branch name, which is generated after
- submitting the job to a queue.
- """
-
- def __init__(self, ci, template, artifacts=None, params=None):
- assert ci in {
- 'circle',
- 'travis',
- 'appveyor',
- 'azure',
- 'github',
- 'drone',
- }
- self.ci = ci
- self.template = template
- self.artifacts = artifacts or []
- self.params = params or {}
- self.branch = None # filled after adding to a queue
- self.commit = None # filled after adding to a queue
- self._queue = None # set by the queue object after put or get
- self._status = None # status cache
- self._assets = None # assets cache
-
- def render_files(self, **params):
- from jinja2 import Template, StrictUndefined
- from jinja2.exceptions import TemplateError
-
- path = CWD / self.template
- params = toolz.merge(self.params, params)
- template = Template(path.read_text(), undefined=StrictUndefined)
- try:
- rendered = template.render(task=self, **params)
- except TemplateError as e:
- raise RuntimeError(
- 'Failed to render template `{}` with {}: {}'.format(
- path, e.__class__.__name__, str(e)
- )
- )
-
- tree = toolz.merge(_default_tree, {self.filename: rendered})
- return unflatten_tree(tree)
-
- @property
- def tag(self):
- return self.branch
-
- @property
- def filename(self):
- config_files = {
- 'circle': '.circleci/config.yml',
- 'travis': '.travis.yml',
- 'appveyor': 'appveyor.yml',
- 'azure': 'azure-pipelines.yml',
- 'github': '.github/workflows/crossbow.yml',
- 'drone': '.drone.yml',
- }
- return config_files[self.ci]
-
- def status(self, force_query=False):
- _status = getattr(self, '_status', None)
- if force_query or _status is None:
- github_commit = self._queue.github_commit(self.commit)
- self._status = TaskStatus(github_commit)
- return self._status
-
- def assets(self, force_query=False):
- _assets = getattr(self, '_assets', None)
- if force_query or _assets is None:
- github_release = self._queue.github_release(self.tag)
- self._assets = TaskAssets(github_release,
- artifact_patterns=self.artifacts)
- return self._assets
-
-
-class TaskStatus:
- """Combine the results from status and checks API to a single state.
-
- Azure pipelines uses checks API which doesn't provide a combined
- interface like status API does, so we need to manually combine
- both the commit statuses and the commit checks coming from
- different API endpoint
-
- Status.state: error, failure, pending or success, default pending
- CheckRun.status: queued, in_progress or completed, default: queued
- CheckRun.conclusion: success, failure, neutral, cancelled, timed_out
- or action_required, only set if
- CheckRun.status == 'completed'
-
- 1. Convert CheckRun's status and conclusion to one of Status.state
- 2. Merge the states based on the following rules:
- - failure if any of the contexts report as error or failure
- - pending if there are no statuses or a context is pending
- - success if the latest status for all contexts is success
- error otherwise.
-
- Parameters
- ----------
- commit : github3.Commit
- Commit to query the combined status for.
-
- Returns
- -------
- TaskStatus(
- combined_state='error|failure|pending|success',
- github_status='original github status object',
- github_check_runs='github checks associated with the commit',
- total_count='number of statuses and checks'
- )
- """
-
- def __init__(self, commit):
- status = commit.status()
- check_runs = list(commit.check_runs())
- states = [s.state for s in status.statuses]
-
- for check in check_runs:
- if check.status == 'completed':
- if check.conclusion in {'success', 'failure'}:
- states.append(check.conclusion)
- elif check.conclusion in {'cancelled', 'timed_out',
- 'action_required'}:
- states.append('error')
- # omit `neutral` conclusion
- else:
- states.append('pending')
-
- # it could be more effective, but the following is more descriptive
- if any(state in {'error', 'failure'} for state in states):
- combined_state = 'failure'
- elif any(state == 'pending' for state in states):
- combined_state = 'pending'
- elif all(state == 'success' for state in states):
- combined_state = 'success'
- else:
- combined_state = 'error'
-
- self.combined_state = combined_state
- self.github_status = status
- self.github_check_runs = check_runs
- self.total_count = len(states)
-
-
-class TaskAssets(dict):
-
- def __init__(self, github_release, artifact_patterns):
- # HACK(kszucs): don't expect uploaded assets of no atifacts were
- # defiened for the tasks in order to spare a bit of github rate limit
- if not artifact_patterns:
- return
-
- if github_release is None:
- github_assets = {} # no assets have been uploaded for the task
- else:
- github_assets = {a.name: a for a in github_release.assets()}
-
- for pattern in artifact_patterns:
- # artifact can be a regex pattern
- compiled = re.compile(pattern)
- matches = list(
- filter(None, map(compiled.match, github_assets.keys()))
- )
- num_matches = len(matches)
-
- # validate artifact pattern matches single asset
- if num_matches == 0:
- self[pattern] = None
- elif num_matches == 1:
- self[pattern] = github_assets[matches[0].group(0)]
- else:
- raise ValueError(
- 'Only a single asset should match pattern `{}`, there are '
- 'multiple ones: {}'.format(pattern, ', '.join(matches))
- )
-
- def missing_patterns(self):
- return [pattern for pattern, asset in self.items() if asset is None]
-
- def uploaded_assets(self):
- return [asset for asset in self.values() if asset is not None]
-
-
-class Job(Serializable):
- """Describes multiple tasks against a single target repository"""
-
- def __init__(self, target, tasks, params=None):
- if not tasks:
- raise ValueError('no tasks were provided for the job')
- if not all(isinstance(task, Task) for task in tasks.values()):
- raise ValueError('each `tasks` mus be an instance of Task')
- if not isinstance(target, Target):
- raise ValueError('`target` must be an instance of Target')
- if not isinstance(target, Target):
- raise ValueError('`target` must be an instance of Target')
- if not isinstance(params, dict):
- raise ValueError('`params` must be an instance of dict')
-
- self.target = target
- self.tasks = tasks
- self.params = params or {} # additional parameters for the tasks
- self.branch = None # filled after adding to a queue
- self._queue = None # set by the queue object after put or get
-
- def render_files(self):
- with StringIO() as buf:
- yaml.dump(self, buf)
- content = buf.getvalue()
- tree = toolz.merge(_default_tree, {'job.yml': content})
- return unflatten_tree(tree)
-
- @property
- def queue(self):
- assert isinstance(self._queue, Queue)
- return self._queue
-
- @queue.setter
- def queue(self, queue):
- assert isinstance(queue, Queue)
- self._queue = queue
- for task in self.tasks.values():
- task._queue = queue
-
- @property
- def email(self):
- return os.environ.get('CROSSBOW_EMAIL', self.target.email)
-
- @property
- def date(self):
- return self.queue.date_of(self)
-
- @classmethod
- def from_config(cls, config, target, tasks=None, groups=None, params=None):
- """
- Intantiate a job from based on a config.
-
- Parameters
- ----------
- config : dict
- Deserialized content of tasks.yml
- target : Target
- Describes target repository and revision the builds run against.
- tasks : Optional[List[str]], default None
- List of glob patterns for matching task names.
- groups : Optional[List[str]], default None
- List of exact group names matching predefined task sets in the
- config.
- params : Optional[Dict[str, str]], default None
- Additional rendering parameters for the task templates.
-
- Returns
- -------
- Job
-
- Raises
- ------
- click.ClickException
- If invalid groups or tasks has been passed.
- """
- task_definitions = config.select(tasks, groups=groups)
-
- # instantiate the tasks
- tasks = {}
- versions = {'version': target.version,
- 'no_rc_version': target.no_rc_version,
- 'no_rc_semver_version': target.no_rc_semver_version}
- for task_name, task in task_definitions.items():
- artifacts = task.pop('artifacts', None) or [] # because of yaml
- artifacts = [fn.format(**versions) for fn in artifacts]
- tasks[task_name] = Task(artifacts=artifacts, **task)
-
- return cls(target=target, tasks=tasks, params=params)
-
- def is_finished(self):
- for task in self.tasks.values():
- status = task.status(force_query=True)
- if status.combined_state == 'pending':
- return False
- return True
-
- def wait_until_finished(self, poll_max_minutes=120,
- poll_interval_minutes=10):
- started_at = time.time()
- while True:
- if self.is_finished():
- break
-
- waited_for_minutes = (time.time() - started_at) / 60
- if waited_for_minutes > poll_max_minutes:
- msg = ('Exceeded the maximum amount of time waiting for job '
- 'to finish, waited for {} minutes.')
- raise RuntimeError(msg.format(waited_for_minutes))
-
- # TODO(kszucs): use logging
- click.echo('Waiting {} minutes and then checking again'
- .format(poll_interval_minutes))
- time.sleep(poll_interval_minutes * 60)
-
-
-class Config(dict):
-
- @classmethod
- def load_yaml(cls, path):
- with Path(path).open() as fp:
- return cls(yaml.load(fp))
-
- def select(self, tasks=None, groups=None):
- config_groups = dict(self['groups'])
- config_tasks = dict(self['tasks'])
- valid_groups = set(config_groups.keys())
- valid_tasks = set(config_tasks.keys())
- group_whitelist = list(groups or [])
- task_whitelist = list(tasks or [])
-
- # validate that the passed groups are defined in the config
- requested_groups = set(group_whitelist)
- invalid_groups = requested_groups - valid_groups
- if invalid_groups:
- msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format(
- invalid_groups, valid_groups
- )
- raise ValueError(msg)
-
- # merge the tasks defined in the selected groups
- task_patterns = [list(config_groups[name]) for name in group_whitelist]
- task_patterns = set(sum(task_patterns, task_whitelist))
-
- # treat the task names as glob patterns to select tasks more easily
- requested_tasks = set(
- toolz.concat(
- fnmatch.filter(valid_tasks, p) for p in task_patterns
- )
- )
-
- # validate that the passed and matched tasks are defined in the config
- invalid_tasks = requested_tasks - valid_tasks
- if invalid_tasks:
- msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format(
- invalid_tasks, valid_tasks
- )
- raise ValueError(msg)
-
- return {
- task_name: config_tasks[task_name] for task_name in requested_tasks
- }
-
- def validate(self):
- # validate that the task groups are properly referening the tasks
- for group_name, group in self['groups'].items():
- for pattern in group:
- tasks = self.select(tasks=[pattern])
- if not tasks:
- raise ValueError(
- "The pattern `{}` defined for task group `{}` is not "
- "matching any of the tasks defined in the "
- "configuration file.".format(pattern, group_name)
- )
-
- # validate that the tasks are constructible
- for task_name, task in self['tasks'].items():
- try:
- Task(**task)
- except Exception as e:
- raise ValueError(
- 'Unable to construct a task object from the '
- 'definition of task `{}`. The original error message '
- 'is: `{}`'.format(task_name, str(e))
- )
-
- # validate that the defined tasks are renderable, in order to to that
- # define the required object with dummy data
- target = Target(
- head='e279a7e06e61c14868ca7d71dea795420aea6539',
- branch='master',
- remote='https://github.com/apache/arrow',
- version='1.0.0dev123',
- email='dummy@example.ltd'
- )
-
- for task_name, task in self['tasks'].items():
- task = Task(**task)
- files = task.render_files(
- arrow=target,
- queue_remote_url='https://github.com/org/crossbow'
- )
- if not files:
- raise ValueError('No files have been rendered for task `{}`'
- .format(task_name))
-
-
-class Report:
-
- def __init__(self, job):
- self.job = job
-
- def show(self):
- raise NotImplementedError()
-
-
-class ConsoleReport(Report):
- """Report the status of a Job to the console using click"""
-
- # output table's header template
- HEADER = '[{state:>7}] {branch:<52} {content:>16}'
- DETAILS = ' └ {url}'
-
- # output table's row template for assets
- ARTIFACT_NAME = '{artifact:>69} '
- ARTIFACT_STATE = '[{state:>7}]'
-
- # state color mapping to highlight console output
- COLORS = {
- # from CombinedStatus
- 'error': 'red',
- 'failure': 'red',
- 'pending': 'yellow',
- 'success': 'green',
- # custom state messages
- 'ok': 'green',
- 'missing': 'red'
- }
-
- def lead(self, state, branch, n_uploaded, n_expected):
- line = self.HEADER.format(
- state=state.upper(),
- branch=branch,
- content='uploaded {} / {}'.format(n_uploaded, n_expected)
- )
- return click.style(line, fg=self.COLORS[state.lower()])
-
- def header(self):
- header = self.HEADER.format(
- state='state',
- branch='Task / Branch',
- content='Artifacts'
- )
- delimiter = '-' * len(header)
- return '{}\n{}'.format(header, delimiter)
-
- def artifact(self, state, pattern, asset):
- if asset is None:
- artifact = pattern
- state = 'pending' if state == 'pending' else 'missing'
- else:
- artifact = asset.name
- state = 'ok'
-
- name_ = self.ARTIFACT_NAME.format(artifact=artifact)
- state_ = click.style(
- self.ARTIFACT_STATE.format(state=state.upper()),
- self.COLORS[state]
- )
- return name_ + state_
-
- def show(self, outstream, asset_callback=None):
- echo = partial(click.echo, file=outstream)
-
- # write table's header
- echo(self.header())
-
- # write table's body
- for task_name, task in sorted(self.job.tasks.items()):
- # write summary of the uploaded vs total assets
- status = task.status()
- assets = task.assets()
-
- # mapping of artifact pattern to asset or None of not uploaded
- n_expected = len(task.artifacts)
- n_uploaded = len(assets.uploaded_assets())
- echo(self.lead(status.combined_state, task_name, n_uploaded,
- n_expected))
-
- # show link to the actual build, some of the CI providers implement
- # the statuses API others implement the checks API, so display both
- for s in status.github_status.statuses:
- echo(self.DETAILS.format(url=s.target_url))
- for c in status.github_check_runs:
- echo(self.DETAILS.format(url=c.html_url))
-
- # write per asset status
- for artifact_pattern, asset in assets.items():
- if asset_callback is not None:
- asset_callback(task_name, task, asset)
- echo(self.artifact(status.combined_state, artifact_pattern,
- asset))
-
-
-class EmailReport(Report):
-
- HEADER = textwrap.dedent("""
- Arrow Build Report for Job {job_name}
-
- All tasks: {all_tasks_url}
- """)
-
- TASK = textwrap.dedent("""
- - {name}:
- URL: {url}
- """).strip()
-
- EMAIL = textwrap.dedent("""
- From: {sender_name} <{sender_email}>
- To: {recipient_email}
- Subject: {subject}
-
- {body}
- """).strip()
-
- STATUS_HEADERS = {
- # from CombinedStatus
- 'error': 'Errored Tasks:',
- 'failure': 'Failed Tasks:',
- 'pending': 'Pending Tasks:',
- 'success': 'Succeeded Tasks:',
- }
-
- def __init__(self, job, sender_name, sender_email, recipient_email):
- self.sender_name = sender_name
- self.sender_email = sender_email
- self.recipient_email = recipient_email
- super().__init__(job)
-
- def url(self, query):
- repo_url = self.job.queue.remote_url.strip('.git')
- return '{}/branches/all?query={}'.format(repo_url, query)
-
- def listing(self, tasks):
- return '\n'.join(
- sorted(
- self.TASK.format(name=task_name, url=self.url(task.branch))
- for task_name, task in tasks.items()
- )
- )
-
- def header(self):
- url = self.url(self.job.branch)
- return self.HEADER.format(job_name=self.job.branch, all_tasks_url=url)
-
- def subject(self):
- return (
- "[NIGHTLY] Arrow Build Report for Job {}".format(self.job.branch)
- )
-
- def body(self):
- buffer = StringIO()
- buffer.write(self.header())
-
- tasks_by_state = toolz.groupby(
- lambda name_task_pair: name_task_pair[1].status().combined_state,
- self.job.tasks.items()
- )
-
- for state in ('failure', 'error', 'pending', 'success'):
- if state in tasks_by_state:
- tasks = dict(tasks_by_state[state])
- buffer.write('\n')
- buffer.write(self.STATUS_HEADERS[state])
- buffer.write('\n')
- buffer.write(self.listing(tasks))
- buffer.write('\n')
-
- return buffer.getvalue()
-
- def email(self):
- return self.EMAIL.format(
- sender_name=self.sender_name,
- sender_email=self.sender_email,
- recipient_email=self.recipient_email,
- subject=self.subject(),
- body=self.body()
- )
-
- def show(self, outstream):
- outstream.write(self.email())
-
- def send(self, smtp_user, smtp_password, smtp_server, smtp_port):
- import smtplib
-
- email = self.email()
-
- server = smtplib.SMTP_SSL(smtp_server, smtp_port)
- server.ehlo()
- server.login(smtp_user, smtp_password)
- server.sendmail(smtp_user, self.recipient_email, email)
- server.close()
-
-
-class GithubPage:
-
- def __init__(self, jobs):
- self.jobs = list(jobs)
-
- def _generate_page(self, links):
- links = ['<li><a href="{}">{}</a></li>'.format(url, name)
- for name, url in sorted(links.items())]
- return '<html><body><ul>{}</ul></body></html>'.format(''.join(links))
-
- def _generate_toc(self, files):
- result, links = {}, {}
- for k, v in files.items():
- if isinstance(v, dict):
- result[k] = self._generate_toc(v)
- links[k] = '{}/'.format(k)
- else:
- result[k] = v
-
- if links:
- result['index.html'] = self._generate_page(links)
-
- return result
-
- def _is_failed(self, status, task_name):
- # for showing task statuses during the rendering procedure
- if status.combined_state == 'success':
- msg = click.style('[ OK] {}'.format(task_name), fg='green')
- failed = False
- else:
- msg = click.style('[FAIL] {}'.format(task_name), fg='yellow')
- failed = True
-
- click.echo(msg)
- return failed
-
- def render_nightlies(self):
- click.echo('\n\nRENDERING NIGHTLIES')
- nightly_files = {}
-
- for job in self.jobs:
- click.echo('\nJOB: {}'.format(job.branch))
- job_files = {}
-
- for task_name, task in sorted(job.tasks.items()):
- # TODO: also render check runs?
- status = task.status()
-
- task_files = {'status.json': status.github_status.as_json()}
- links = {'status.json': 'status.json'}
-
- if not self._is_failed(status, task_name):
- # accumulate links to uploaded assets
- for asset in task.assets().uploaded_assets():
- links[asset.name] = asset.browser_download_url
-
- if links:
- page_content = self._generate_page(links)
- task_files['index.html'] = page_content
-
- job_files[task_name] = task_files
-
- nightly_files[str(job.date)] = job_files
-
- # write the most recent wheels under the latest directory
- if 'latest' not in nightly_files:
- nightly_files['latest'] = job_files
-
- return nightly_files
-
- def render_pypi_simple(self):
- click.echo('\n\nRENDERING PYPI')
-
- wheels = {}
- for job in self.jobs:
- click.echo('\nJOB: {}'.format(job.branch))
-
- for task_name, task in sorted(job.tasks.items()):
- if not task_name.startswith('wheel'):
- continue
- status = task.status()
- if self._is_failed(status, task_name):
- continue
- for asset in task.assets().uploaded_assets():
- wheels[asset.name] = asset.browser_download_url
-
- return {'pyarrow': {'index.html': self._generate_page(wheels)}}
-
- def render(self):
- # directory structure for the github pages, only wheels are supported
- # at the moment
- files = self._generate_toc({
- 'nightly': self.render_nightlies(),
- 'pypi': self.render_pypi_simple(),
- })
- files['.nojekyll'] = ''
- return files
-
-
-# configure yaml serializer
-yaml = YAML()
-yaml.register_class(Job)
-yaml.register_class(Task)
-yaml.register_class(Target)
-
-
-# define default paths
-DEFAULT_CONFIG_PATH = str(CWD / 'tasks.yml')
-DEFAULT_ARROW_PATH = CWD.parents[1]
-DEFAULT_QUEUE_PATH = CWD.parents[2] / 'crossbow'
-
-
-@click.group()
-@click.option('--github-token', '-t', default=None,
- help='OAuth token for GitHub authentication')
-@click.option('--arrow-path', '-a',
- type=click.Path(), default=str(DEFAULT_ARROW_PATH),
- help='Arrow\'s repository path. Defaults to the repository of '
- 'this script')
-@click.option('--queue-path', '-q',
- type=click.Path(), default=str(DEFAULT_QUEUE_PATH),
- help='The repository path used for scheduling the tasks. '
- 'Defaults to crossbow directory placed next to arrow')
-@click.option('--queue-remote', '-qr', default=None,
- help='Force to use this remote URL for the Queue repository')
-@click.option('--output-file', metavar='<output>',
- type=click.File('w', encoding='utf8'), default='-',
- help='Capture output result into file.')
-@click.pass_context
-def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote,
- output_file):
- ctx.ensure_object(dict)
- ctx.obj['output'] = output_file
- ctx.obj['arrow'] = Repo(arrow_path)
- ctx.obj['queue'] = Queue(queue_path, remote_url=queue_remote,
- github_token=github_token, require_https=True)
-
-
-@crossbow.command()
-@click.option('--config-path', '-c',
- type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
- help='Task configuration yml. Defaults to tasks.yml')
-def check_config(config_path):
- # load available tasks configuration and groups from yaml
- config = Config.load_yaml(config_path)
- config.validate()
-
-
-@crossbow.command()
-@click.argument('tasks', nargs=-1, required=False)
-@click.option('--group', '-g', 'groups', multiple=True,
- help='Submit task groups as defined in task.yml')
-@click.option('--param', '-p', 'params', multiple=True,
- help='Additional task parameters for rendering the CI templates')
-@click.option('--job-prefix', default='build',
- help='Arbitrary prefix for branch names, e.g. nightly')
-@click.option('--config-path', '-c',
- type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
- help='Task configuration yml. Defaults to tasks.yml')
-@click.option('--arrow-version', '-v', default=None,
- help='Set target version explicitly.')
-@click.option('--arrow-remote', '-r', default=None,
- help='Set GitHub remote explicitly, which is going to be cloned '
- 'on the CI services. Note, that no validation happens '
- 'locally. Examples: https://github.com/apache/arrow or '
- 'https://github.com/kszucs/arrow.')
-@click.option('--arrow-branch', '-b', default=None,
- help='Give the branch name explicitly, e.g. master, ARROW-1949.')
-@click.option('--arrow-sha', '-t', default=None,
- help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
- 'apache-arrow-0.11.1.')
-@click.option('--fetch/--no-fetch', default=True,
- help='Fetch references (branches and tags) from the remote')
-@click.option('--dry-run/--push', default=False,
- help='Just display the rendered CI configurations without '
- 'submitting them')
-@click.pass_obj
-def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version,
- arrow_remote, arrow_branch, arrow_sha, fetch, dry_run):
- output = obj['output']
- queue, arrow = obj['queue'], obj['arrow']
-
- # load available tasks configuration and groups from yaml
- config = Config.load_yaml(config_path)
- config.validate()
-
- # Override the detected repo url / remote, branch and sha - this aims to
- # make release procedure a bit simpler.
- # Note, that the target resivion's crossbow templates must be
- # compatible with the locally checked out version of crossbow (which is
- # in case of the release procedure), because the templates still
- # contain some business logic (dependency installation, deployments)
- # which will be reduced to a single command in the future.
- target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch,
- head=arrow_sha, version=arrow_version)
-
- # parse additional job parameters
- params = dict([p.split("=") for p in params])
-
- # instantiate the job object
- job = Job.from_config(config=config, target=target, tasks=tasks,
- groups=groups, params=params)
-
- if dry_run:
- yaml.dump(job, output)
- else:
- if fetch:
- queue.fetch()
- queue.put(job, prefix=job_prefix)
- queue.push()
- yaml.dump(job, output)
- click.echo('Pushed job identifier is: `{}`'.format(job.branch))
-
-
-@crossbow.command()
-@click.argument('job-name', required=True)
-@click.pass_obj
-def status(obj, job_name):
- output = obj['output']
- queue = obj['queue']
- queue.fetch()
-
- job = queue.get(job_name)
- ConsoleReport(job).show(output)
-
-
-@crossbow.command()
-@click.argument('prefix', required=True)
-@click.pass_obj
-def latest_prefix(obj, prefix):
- queue = obj['queue']
- queue.fetch()
-
- latest = queue.latest_for_prefix(prefix)
- click.echo(latest.branch)
-
-
-@crossbow.command()
-@click.argument('job-name', required=True)
-@click.option('--sender-name', '-n',
- help='Name to use for report e-mail.')
-@click.option('--sender-email', '-e',
- help='E-mail to use for report e-mail.')
-@click.option('--recipient-email', '-r',
- help='Where to send the e-mail report')
-@click.option('--smtp-user', '-u',
- help='E-mail address to use for SMTP login')
-@click.option('--smtp-password', '-P',
- help='SMTP password to use for report e-mail.')
-@click.option('--smtp-server', '-s', default='smtp.gmail.com',
- help='SMTP server to use for report e-mail.')
-@click.option('--smtp-port', '-p', default=465,
- help='SMTP port to use for report e-mail.')
-@click.option('--poll/--no-poll', default=False,
- help='Wait for completion if there are tasks pending')
-@click.option('--poll-max-minutes', default=180,
- help='Maximum amount of time waiting for job completion')
-@click.option('--poll-interval-minutes', default=10,
- help='Number of minutes to wait to check job status again')
-@click.option('--send/--dry-run', default=False,
- help='Just display the report, don\'t send it')
-@click.pass_obj
-def report(obj, job_name, sender_name, sender_email, recipient_email,
- smtp_user, smtp_password, smtp_server, smtp_port, poll,
- poll_max_minutes, poll_interval_minutes, send):
- """
- Send an e-mail report showing success/failure of tasks in a Crossbow run
- """
- output = obj['output']
- queue = obj['queue']
- queue.fetch()
-
- job = queue.get(job_name)
- report = EmailReport(
- job=job,
- sender_name=sender_name,
- sender_email=sender_email,
- recipient_email=recipient_email
- )
-
- if poll:
- job.wait_until_finished(
- poll_max_minutes=poll_max_minutes,
- poll_interval_minutes=poll_interval_minutes
- )
-
- if send:
- report.send(
- smtp_user=smtp_user,
- smtp_password=smtp_password,
- smtp_server=smtp_server,
- smtp_port=smtp_port
- )
- else:
- report.show(output)
-
-
-@crossbow.group()
-@click.pass_context
-def github_page(ctx):
- # currently We only list links to nightly binary wheels
- pass
-
-
-@github_page.command('generate')
-@click.option('-n', default=10,
- help='Number of most recent jobs')
-@click.option('--gh-branch', default='gh-pages',
- help='Github pages branch')
-@click.option('--job-prefix', default='nightly',
- help='Job/tag prefix the wheel links should be generated for')
-@click.option('--dry-run/--push', default=False,
- help='Just render the files without pushing')
-@click.option('--github-push-token', '-t', default=None,
- help='OAuth token for GitHub authentication only used for '
- 'pushing to the crossbow repository, the API requests '
- 'will consume the token passed to the top level crossbow '
- 'command.')
-@click.pass_context
-def generate_github_page(ctx, n, gh_branch, job_prefix, dry_run,
- github_push_token):
- queue = ctx.obj['queue']
- queue.fetch()
-
- # fail early if the requested branch is not available in the local checkout
- remote = 'origin'
- branch = queue.repo.branches['{}/{}'.format(remote, gh_branch)]
- head = queue.repo[branch.target]
-
- # $ at the end of the pattern is important because we're only looking for
- # branches belonging to jobs not branches belonging to tasks
- # the branches we're looking for are like 2020-01-01-0
- jobs = queue.jobs(pattern=r"^nightly-(\d{4})-(\d{2})-(\d{2})-(\d+)$")
- page = GithubPage(toolz.take(n, jobs))
- files = page.render()
- files.update(unflatten_tree(_default_tree))
-
- if dry_run:
- click.echo(files)
- return
-
- refname = 'refs/heads/{}'.format(gh_branch)
- message = 'Update nightly wheel links {}'.format(date.today())
- commit = queue.create_commit(files, parents=[head.id], message=message,
- reference_name=refname)
- click.echo('Updated `{}` branch\'s head to `{}`'
- .format(gh_branch, commit.id))
- queue.push([refname], github_token=github_push_token)
-
-
-@crossbow.command()
-@click.argument('job-name', required=True)
-@click.option('-t', '--target-dir',
- default=str(DEFAULT_ARROW_PATH / 'packages'),
- type=click.Path(file_okay=False, dir_okay=True),
- help='Directory to download the build artifacts')
-@click.pass_obj
-def download_artifacts(obj, job_name, target_dir):
- """Download build artifacts from GitHub releases"""
- output = obj['output']
-
- # fetch the queue repository
- queue = obj['queue']
- queue.fetch()
-
- # query the job's artifacts
- job = queue.get(job_name)
-
- # create directory to download the assets to
- target_dir = Path(target_dir).absolute() / job_name
- target_dir.mkdir(parents=True, exist_ok=True)
-
- # download the assets while showing the job status
- def asset_callback(task_name, task, asset):
- if asset is not None:
- path = target_dir / task_name / asset.name
- path.parent.mkdir(exist_ok=True)
- asset.download(path)
-
- click.echo('Downloading {}\'s artifacts.'.format(job_name))
- click.echo('Destination directory is {}'.format(target_dir))
- click.echo()
-
- report = ConsoleReport(job)
- report.show(output, asset_callback=asset_callback)
-
-
-@crossbow.command()
-@click.option('--sha', required=True, help='Target committish')
-@click.option('--tag', required=True, help='Target tag')
-@click.option('--method', default='curl', help='Use cURL to upload')
-@click.option('--pattern', '-p', 'patterns', required=True, multiple=True,
- help='File pattern to upload as assets')
-@click.pass_obj
-def upload_artifacts(obj, tag, sha, patterns, method):
- queue = obj['queue']
- queue.github_overwrite_release_assets(
- tag_name=tag, target_commitish=sha, method=method, patterns=patterns
- )
-
-
-if __name__ == '__main__':
- crossbow(obj={}, auto_envvar_prefix='CROSSBOW')
diff --git a/dev/tasks/docker-tests/azure.linux.yml b/dev/tasks/docker-tests/azure.linux.yml
index f136947..c3706be 100644
--- a/dev/tasks/docker-tests/azure.linux.yml
+++ b/dev/tasks/docker-tests/azure.linux.yml
@@ -20,12 +20,12 @@
pool:
vmImage: ubuntu-16.04
timeoutInMinutes: 360
- {%- if env is defined %}
+ {% if env is defined %}
variables:
- {%- for key, value in env.items() %}
+ {% for key, value in env.items() %}
{{ key }}: {{ value }}
- {%- endfor %}
- {%- endif %}
+ {% endfor %}
+ {% endif %}
steps:
- task: DockerInstaller@0
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index c5851ed..255c9ac 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -15,42 +15,28 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
test:
name: Docker Test
runs-on: ubuntu-latest
steps:
- - name: Checkout Arrow
- shell: bash
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
- name: Free Up Disk Space
shell: bash
run: arrow/ci/scripts/util_cleanup.sh
- - name: Setup Python
- uses: actions/setup-python@v1
- with:
- python-version: 3.9
- - name: Setup Archery
- run: pip install -e arrow/dev/archery[docker]
+
- name: Execute Docker Build
shell: bash
- {%- if env is defined %}
+ {% if env is defined %}
env:
- {%- for key, value in env.items() %}
+ {% for key, value in env.items() %}
{{ key }}: {{ value }}
- {%- endfor %}
- {%- endif %}
+ {% endfor %}
+ {% endif %}
run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }}
diff --git a/dev/tasks/gandiva-jars/github.linux.yml b/dev/tasks/gandiva-jars/github.linux.yml
index 79d2ccc..eb16418 100644
--- a/dev/tasks/gandiva-jars/github.linux.yml
+++ b/dev/tasks/gandiva-jars/github.linux.yml
@@ -15,14 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
package:
@@ -48,21 +43,5 @@
env:
OS_NAME: "linux"
CHECK_SHARED_DEPENDENCIES: true
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
- - name: Set up Crossbow
- run: |
- pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
- - name: Upload artifacts
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path arrow \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --sha {{ task.branch }} \
- --tag {{ task.tag }} \
- --pattern "arrow/dist/*.jar"
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+
+ {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/gandiva-jars/github.osx.yml b/dev/tasks/gandiva-jars/github.osx.yml
index 158c22d..3dd6fe4 100644
--- a/dev/tasks/gandiva-jars/github.osx.yml
+++ b/dev/tasks/gandiva-jars/github.osx.yml
@@ -15,14 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
package:
@@ -47,26 +42,5 @@
OS_NAME: "osx"
CHECK_SHARED_DEPENDENCIES: true
MACOSX_DEPLOYMENT_TARGET: "10.11"
- - name: Set up Crossbow
- run: |
- brew install libgit2
- pip3 install \
- click \
- github3.py \
- jinja2 \
- jira \
- pygit2 \
- ruamel.yaml \
- setuptools_scm \
- toolz
- - name: Upload artifacts
- run: |
- python3 arrow/dev/tasks/crossbow.py \
- --queue-path arrow \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --sha {{ task.branch }} \
- --tag {{ task.tag }} \
- --pattern "arrow/dist/*.jar"
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
\ No newline at end of file
+
+ {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 1335a97..380f025 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -15,28 +15,20 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
package:
name: Package
runs-on: ubuntu-18.04
steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_login_dockerhub()|indent }}
+
- name: Set up Ruby
uses: actions/setup-ruby@v1
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- name: Free Up Disk Space
shell: bash
run: arrow/ci/scripts/util_cleanup.sh
@@ -65,8 +57,6 @@
shell: bash
run: |
pushd arrow/dev/tasks/linux-packages
- docker login -u "{{ '${{ secrets.DOCKERHUB_USER }}' }}" \
- -p "{{ '${{ secrets.DOCKERHUB_TOKEN }}' }}"
rake docker:push
popd
env:
@@ -103,28 +93,6 @@
APT_TARGETS: {{ target }}
ARROW_VERSION: {{ arrow.version }}
YUM_TARGETS: {{ target }}
- # Using GitHub release tries to find a common ancestor between the
- # currently pushed tag and the latest tag of the GitHub repository
- # (don't know why).
- # The tag upload took 43 minutes because of this scan, so use an
- # alternative upload script.
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
- - name: Set up Crossbow
- run: |
- pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
- - name: Upload artifacts
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- {%- for extension in upload_extensions %}
- --pattern "arrow/dev/tasks/linux-packages/**/*{{ extension }}" \
- {%- endfor %}
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+
+ {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %}
+ {{ macros.github_upload_releases(patterns)|indent }}
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index 0b83520..e9457d6 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+{% import 'macros.jinja' as macros with context %}
+
arch: arm64-graviton2
virt: vm
os: linux
@@ -66,9 +68,8 @@
- YUM_TARGETS={{ target }}
before_script:
- - git clone --no-checkout {{ arrow.remote }} arrow
- - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- - git -C arrow checkout FETCH_HEAD
+ {{ macros.travis_checkout_arrow() }}
+ {{ macros.travis_docker_login() }}
# Build createrepo_c from source.
# We can remove them when we can install createrepo_c package
@@ -143,15 +144,5 @@
- popd
after_success:
- - sudo -H pip3 install --upgrade pip
- - sudo -H pip3 install -r arrow/dev/tasks/requirements-crossbow.txt pygit2==1.0
- - |
- python3 arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- {%- for extension in upload_extensions %}
- --pattern "arrow/dev/tasks/linux-packages/**/*{{ extension }}" \
- {%- endfor %}
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
+ {% set patterns = upload_extensions | format_all("arrow/python/repaired_wheels/*.whl") %}
+ {{ macros.github_upload_releases(patterns) }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
new file mode 100644
index 0000000..e0552b1
--- /dev/null
+++ b/dev/tasks/macros.jinja
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{%- macro github_header() -%}
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+on:
+ push:
+ branches:
+ - "*-github-*"
+{% endmacro %}
+
+{%- macro github_checkout_arrow() -%}
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow config core.symlinks true
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+{% endmacro %}
+
+{%- macro github_login_dockerhub() -%}
+ - name: Login to Dockerhub
+ uses: docker/login-action@v1
+ with:
+ username: {{ '${{ secrets.DOCKERHUB_USER }}' }}
+ password: {{ '${{ secrets.DOCKERHUB_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_login_ghcr() -%}
+ - name: Login to GitHub Container Registry
+ shell: bash
+ run: docker login ghcr.io -u {{ '${{ github.repository_owner }}' }} -p {{ '${{ secrets.CROSSBOW_GHCR_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_install_archery() -%}
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Install Archery
+ shell: bash
+ run: pip install -e arrow/dev/archery[all]
+{% endmacro %}
+
+{%- macro github_upload_releases(pattern) -%}
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Setup Crossbow
+ shell: bash
+ run: pip install -e arrow/dev/archery[crossbow]
+ - name: Upload artifacts
+ shell: bash
+ run: |
+ archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ --pattern "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+ env:
+ CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_upload_gemfury(pattern) -%}
+ {% if arrow.branch == 'master' %}
+ - name: Upload package to Gemfury
+ shell: bash
+ run: |
+ path=$(ls {{ patter }})
+ curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/
+ env:
+ CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
+ CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
+ {% endif %}
+{% endmacro %}
+
+{%- macro azure_checkout_arrow() -%}
+ - script: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ displayName: Clone arrow
+{% endmacro %}
+
+{%- macro azure_upload_releases(pattern) -%}
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.8'
+ - script: pip install -e arrow/dev/archery[crossbow]
+ displayName: Install Crossbow
+ - bash: |
+ archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ --pattern "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ --pattern "{{ p }}" {{ "" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+ env:
+ CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN)
+ displayName: Upload packages as a GitHub release
+{% endmacro %}
+
+{%- macro azure_upload_anaconda(pattern) -%}
+ {% if arrow.branch == 'master' %}
+ - task: CondaEnvironment@1
+ inputs:
+ packageSpecs: 'anaconda-client'
+ installOptions: '-c conda-forge'
+ updateConda: no
+ - script: |
+ conda install -y anaconda-client
+ anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force {{ pattern }}
+ displayName: Upload packages to Anaconda
+ {% endif %}
+{% endmacro %}
+
+{%- macro travis_checkout_arrow() -%}
+ - git clone --no-checkout {{ arrow.remote }} arrow
+ - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ - git -C arrow checkout FETCH_HEAD
+ - git -C arrow submodule update --init --recursive
+{% endmacro %}
+
+{%- macro travis_install_archery() -%}
+ - sudo -H pip3 install --upgrade pip
+ - sudo -H pip3 install docker-compose
+ - sudo -H pip3 install -e arrow/dev/archery[docker]
+{% endmacro %}
+
+{%- macro travis_docker_login() -%}
+ - echo "${DOCKERHUB_TOKEN}" | docker login --username "${DOCKERHUB_USER}" --password-stdin
+{% endmacro %}
+
+{%- macro travis_upload_releases(pattern) -%}
+ - sudo -H pip3 install pygit2==1.0
+ - sudo -H pip3 install arrow/dev/archery[crossbow]
+ - |
+ archery crossbow
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ --pattern "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+{% endmacro %}
+
+{%- macro travis_upload_gemfury(pattern) -%}
+ {% if arrow.branch == 'master' %}
+ - |
+ WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
+ curl \
+ -F "package=@${WHEEL_PATH}" \
+ "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
+ {% endif %}
+{% endmacro %}
diff --git a/dev/tasks/nightlies.sample.yml b/dev/tasks/nightlies.sample.yml
index 12b7a09..710f7c0 100644
--- a/dev/tasks/nightlies.sample.yml
+++ b/dev/tasks/nightlies.sample.yml
@@ -52,32 +52,17 @@
conda config --set show_channel_urls true
install:
- - |
- conda install -y \
- jinja2 \
- pygit2 \
- click \
- ruamel.yaml \
- setuptools_scm \
- github3.py \
- python-gnupg
-
-script:
+ - pushd ..
# to build against a specific branch of a fork
# git clone -b <branch> https://github.com/<user>/arrow
- - pushd ..
- - git clone -b master https://github.com/apache/arrow
+ - git clone https://github.com/apache/arrow
+ - pip install dev/archery[crossbow]
+script:
# submit packaging tasks
- |
if [ $TRAVIS_EVENT_TYPE = "cron" ]; then
- python arrow/dev/tasks/crossbow.py submit \
- -g conda \
- -g wheel \
- -g linux
+ archery crossbow submit -g conda -g wheel -g linux
else
- python arrow/dev/tasks/crossbow.py submit --dry-run \
- -g conda \
- -g wheel \
- -g linux
+ archery crossbow submit --dry-run -g conda -g wheel -g linux
fi
diff --git a/dev/tasks/nuget-packages/github.linux.yml b/dev/tasks/nuget-packages/github.linux.yml
index 6bb5ea0..cd03a7b 100644
--- a/dev/tasks/nuget-packages/github.linux.yml
+++ b/dev/tasks/nuget-packages/github.linux.yml
@@ -15,36 +15,18 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# GitHub comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
-
-env:
- DOCKER_BUILDKIT: 0
- COMPOSE_DOCKER_CLI_BUILD: 1
+{{ macros.github_header() }}
jobs:
package:
name: Package
runs-on: ubuntu-latest
steps:
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Setup Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
- - name: Setup Archery
- run: pip install -e arrow/dev/archery[docker]
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
- name: Prepare version
run: |
sed -i'' -E -e \
@@ -55,31 +37,7 @@
pushd arrow
archery docker run {{ run }}
popd
- # Using GitHub release tries to find a common ancestor between the
- # currently pushed tag and the latest tag of the GitHub repository
- # (don't know why).
- # The tag upload took 43 minutes because of this scan, so use an
- # alternative upload script.
- - name: Set up Crossbow
- run: |
- pip install \
- click \
- github3.py \
- jinja2 \
- jira \
- pygit2 \
- ruamel.yaml \
- setuptools_scm \
- toolz
- - name: Upload artifacts
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --pattern "arrow/csharp/artifacts/**/*.nupkg" \
- --pattern "arrow/csharp/artifacts/**/*.snupkg" \
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+
+ {% set patterns = ["arrow/csharp/artifacts/**/*.nupkg",
+ "arrow/csharp/artifacts/**/*.snupkg"] %}
+ {{ macros.github_upload_releases(patterns)|indent }}
diff --git a/dev/tasks/python-sdist/github.yml b/dev/tasks/python-sdist/github.yml
index b784fbd..6837187 100644
--- a/dev/tasks/python-sdist/github.yml
+++ b/dev/tasks/python-sdist/github.yml
@@ -15,35 +15,18 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
build:
name: "Build sdist"
runs-on: ubuntu-20.04
steps:
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow config core.symlinks true
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- run: cd arrow && ci/scripts/util_checkout.sh
- - name: Free Up Disk Space
- shell: bash
- run: arrow/ci/scripts/util_cleanup.sh
- - uses: actions/setup-python@v2
- - name: Setup Archery
- run: pip install -e arrow/dev/archery[docker]
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
- name: Build sdist
run: |
archery docker run python-sdist
@@ -52,32 +35,11 @@
{% endif %}
env:
PYARROW_VERSION: {{ arrow.no_rc_version }}
+
- name: Test sdist
run: archery docker run ubuntu-python-sdist-test
env:
PYARROW_VERSION: {{ arrow.no_rc_version }}
- {% if arrow.branch == 'master' %}
- - name: Upload to gemfury
- run: |
- SDIST_PATH=$(echo arrow/python/dist/*.tar.gz)
- curl \
- -F "package=@${SDIST_PATH}" \
- "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
- env:
- CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
- CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
- {% endif %}
- - name: Setup Crossbow
- run: pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
- - name: Upload artifacts
- shell: bash
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --pattern "arrow/python/dist/*.tar.gz" \
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+
+ {{ macros.github_upload_releases("arrow/python/dist/*.tar.gz")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/dist/*.tar.gz")|indent }}
diff --git a/dev/tasks/python-wheels/github.linux.amd64.yml b/dev/tasks/python-wheels/github.linux.amd64.yml
index 779aa54..a626407 100644
--- a/dev/tasks/python-wheels/github.linux.amd64.yml
+++ b/dev/tasks/python-wheels/github.linux.amd64.yml
@@ -15,14 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
build:
@@ -34,38 +29,14 @@
PYTHON: {{ python_version }}
steps:
- ############################ Checkout ###################################
- - name: Checkout Arrow
- shell: bash
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow config core.symlinks true
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- shell: bash
- run: cd arrow && ci/scripts/util_checkout.sh
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+ {{ macros.github_login_dockerhub()|indent }}
- ############################ Docker Registry ############################
- - name: Login to GitHub Container Registry
- uses: docker/login-action@v1
- with:
- username: {{ '${{ secrets.DOCKERHUB_USER }}' }}
- password: {{ '${{ secrets.DOCKERHUB_TOKEN }}' }}
-
- ############################ Archery Installation #######################
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
- - name: Install Archery and Crossbow dependencies
- run: pip install -e arrow/dev/archery[all]
-
- ############################ Build & Test ###############################
- name: Build wheel
shell: bash
run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
+
# TODO(kszucs): auditwheel show
- name: Test wheel
shell: bash
@@ -73,35 +44,5 @@
archery docker run python-wheel-manylinux-test-imports
archery docker run python-wheel-manylinux-test-unittests
- ############################ Artifact Uploading #########################
- - name: Upload artifacts
- shell: bash
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --pattern "arrow/python/repaired_wheels/*.whl" \
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
-
- {% if arrow.branch == 'master' %}
- - name: Push the docker image
- shell: bash
- run: |
- archery docker push python-wheel-manylinux-{{ manylinux_version }}
- archery docker push python-wheel-manylinux-test-unittests
-
- - name: Upload to gemfury
- shell: bash
- run: |
- WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
- curl \
- -F "package=@${WHEEL_PATH}" \
- "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
- env:
- CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
- CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
- {% endif %}
+ {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index db67d99..af0cc44 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -14,14 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
env:
ARROW_S3: {{ arrow_s3 }}
@@ -41,13 +36,7 @@
name: Build wheel for OS X
runs-on: macos-latest
steps:
- - name: Checkout Arrow
- run: |
- set -ex
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
+ {{ macros.github_checkout_arrow()|indent }}
- name: Install System Dependencies
run: brew install bison ninja
@@ -140,28 +129,5 @@
unset MACOSX_DEPLOYMENT_TARGET
arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
- - name: Setup Crossbow
- run: pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
-
- - name: Upload artifacts
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path $(pwd) \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --sha {{ task.branch }} \
- --tag {{ task.tag }} \
- --pattern "arrow/python/dist/*.whl"
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
-
- {% if arrow.branch == 'master' %}
- - name: Upload package to Gemfury
- run: |
- # upload to gemfury pypi repository, there should be a single wheel
- path=$(ls arrow/python/dist/*.whl)
- curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/
- env:
- CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
- CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
- {% endif %}
+ {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 2e24f36..922533b 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -15,14 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
build:
@@ -42,64 +37,17 @@
ARCHERY_USE_DOCKER_CLI: 1
steps:
- ############################ Checkout ###################################
- - name: Checkout Arrow
- shell: bash
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow config core.symlinks true
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- shell: bash
- run: cd arrow && ci/scripts/util_checkout.sh
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_login_ghcr()|indent }}
+ {{ macros.github_install_archery()|indent }}
- ############################ Docker Registry ############################
- - name: Login to GitHub Container Registry
- shell: bash
- run: docker login ghcr.io -u {{ '${{ github.repository_owner }}' }} -p {{ '${{ secrets.CROSSBOW_GHCR_TOKEN }}' }}
-
- ############################ Archery Installation #######################
- - name: Set up Python
- uses: actions/setup-python@v2
- with:
- python-version: 3.8
- - name: Install Archery and Crossbow dependencies
- shell: bash
- run: pip install -e arrow/dev/archery[all]
-
- ############################ Build & Test ###############################
- name: Build wheel
shell: cmd
run: archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+
- name: Test wheel
shell: cmd
run: archery docker run python-wheel-windows-test
- ############################ Artifact Uploading #########################
- - name: Upload artifacts
- shell: bash
- run: |
- python arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --pattern "arrow/python/dist/*.whl" \
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
- env:
- CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
-
- {% if arrow.branch == 'master' %}
- - name: Upload to gemfury
- shell: bash
- run: |
- WHEEL_PATH=$(echo arrow/python/dist/*.whl)
- curl.exe \
- -F "package=@${WHEEL_PATH}" \
- "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
- env:
- CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
- CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
- {% endif %}
+ {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/travis.linux.arm64.yml b/dev/tasks/python-wheels/travis.linux.arm64.yml
index 01ae487..137ad6b 100644
--- a/dev/tasks/python-wheels/travis.linux.arm64.yml
+++ b/dev/tasks/python-wheels/travis.linux.arm64.yml
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+{% import 'macros.jinja' as macros with context %}
+
arch: arm64-graviton2
virt: vm
os: linux
@@ -43,23 +45,14 @@
- PYTHON={{ python_version }}
before_script:
- - git clone --no-checkout {{ arrow.remote }} arrow
- - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- - git -C arrow checkout FETCH_HEAD
- - git -C arrow submodule update --init --recursive
+ {{ macros.travis_checkout_arrow() }}
+ {{ macros.travis_docker_login() }}
script:
# Install Archery and Crossbow dependencies
- - sudo -H pip3 install --upgrade pip
- - sudo -H pip3 install docker-compose
- - sudo -H pip3 install -e arrow/dev/archery[docker]
- - sudo -H pip3 install -r arrow/dev/tasks/requirements-crossbow.txt pygit2==1.0
-
- # Docker Login:
- - echo "${DOCKERHUB_TOKEN}" | docker login --username "${DOCKERHUB_USER}" --password-stdin
+ {{ macros.travis_install_archery() }}
# Build and Test packages
-
# output something every minutes to prevent travis from killing the build
- while sleep 1m; do echo "=====[ $SECONDS seconds still running ]====="; done &
- archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
@@ -69,23 +62,11 @@
after_success:
# Upload wheel as github artifact
- - |
- python3 arrow/dev/tasks/crossbow.py \
- --queue-path . \
- --queue-remote {{ queue_remote_url }} \
- upload-artifacts \
- --pattern "arrow/python/repaired_wheels/*.whl" \
- --sha {{ task.branch }} \
- --tag {{ task.tag }}
+ {{ macros.travis_upload_releases("arrow/python/repaired_wheels/*.whl") }}
+ {{ macros.travis_upload_gemfury("arrow/python/repaired_wheels/*.whl") }}
{% if arrow.branch == 'master' %}
# Push the docker image to dockerhub
- archery docker push python-wheel-manylinux-{{ manylinux_version }}
- archery docker push python-wheel-manylinux-test-unittests
- # Upload as nightly wheel to gemfury
- - |
- WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
- curl \
- -F "package=@${WHEEL_PATH}" \
- "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
{% endif %}
diff --git a/dev/tasks/requirements-crossbow.txt b/dev/tasks/requirements-crossbow.txt
deleted file mode 100644
index 2436b4d..0000000
--- a/dev/tasks/requirements-crossbow.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-click>=7.1
-github3.py
-jinja2
-pygit2
-ruamel.yaml
-setuptools_scm
-toolz
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 84114c0..5a04c98 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -40,7 +40,7 @@
linux-arm64:
- debian-*-arm64
- ubuntu-*-arm64
- - centos-*-aarch64
+ - centos-*-arm64
gandiva:
- gandiva-*
@@ -236,7 +236,7 @@
- arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
- pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
- conda-linux-gcc-py36-aarch64:
+ conda-linux-gcc-py36-arm64:
ci: drone
template: conda-recipes/drone.yml
params:
@@ -245,7 +245,7 @@
- arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
- pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
- conda-linux-gcc-py37-aarch64:
+ conda-linux-gcc-py37-arm64:
ci: drone
template: conda-recipes/drone.yml
params:
@@ -254,7 +254,7 @@
- arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
- pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
- conda-linux-gcc-py38-aarch64:
+ conda-linux-gcc-py38-arm64:
ci: drone
template: conda-recipes/drone.yml
params:
@@ -263,7 +263,7 @@
- arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
- pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
- conda-linux-gcc-py39-aarch64:
+ conda-linux-gcc-py39-arm64:
ci: drone
template: conda-recipes/drone.yml
params:
@@ -343,239 +343,62 @@
- arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
- pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
- ############################## Wheel Linux ##################################
- # AMD64
+{% for python_version, python_tag, abi_tag in [("3.6", "cp36", "cp36m"),
+ ("3.7", "cp37", "cp37m"),
+ ("3.8", "cp38", "cp38"),
+ ("3.9", "cp39", "cp39")] %}
- wheel-manylinux2010-cp36m-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
+{############################## Wheel Linux ##################################}
+
+{% for ci, arch, arch_alias, manylinux in [("github", "amd64", "x86_64", "2010"),
+ ("github", "amd64", "x86_64", "2014"),
+ ("travis", "arm64", "aarch64", "2014")] %}
+ {% set platform_tag = "manylinux{}_{}".format(manylinux, arch_alias) %}
+
+ wheel-manylinux{{ manylinux }}-{{ python_tag }}-{{ arch }}:
+ ci: {{ ci }}
+ template: python-wheels/{{ ci }}.linux.{{ arch }}.yml
params:
- python_version: 3.6
- manylinux_version: 2010
+ python_version: {{ python_version }}
+ manylinux_version: {{ manylinux }}
artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-manylinux2010_x86_64.whl
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
- wheel-manylinux2010-cp37m-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.7
- manylinux_version: 2010
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-manylinux2010_x86_64.whl
+{% endfor %}
- wheel-manylinux2010-cp38-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.8
- manylinux_version: 2010
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-manylinux2010_x86_64.whl
+{############################## Wheel OSX ####################################}
- wheel-manylinux2010-cp39-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.9
- manylinux_version: 2010
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-manylinux2010_x86_64.whl
+# enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl
+{% for macos_version, macos_codename, arrow_s3 in [("10.9", "mavericks", "OFF"),
+ ("10.13", "high-sierra", "ON")] %}
+ {% set platform_tag = "macosx_{}_{}".format(macos_version.replace('.', '_'), arch_alias) %}
- wheel-manylinux2014-cp36m-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.6
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-manylinux2014_x86_64.whl
-
- wheel-manylinux2014-cp37m-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.7
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-manylinux2014_x86_64.whl
-
- wheel-manylinux2014-cp38-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.8
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-manylinux2014_x86_64.whl
-
- wheel-manylinux2014-cp39-amd64:
- ci: github
- template: python-wheels/github.linux.amd64.yml
- params:
- python_version: 3.9
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-manylinux2014_x86_64.whl
-
- # ARM64v8
-
- wheel-manylinux2014-cp36m-arm64:
- ci: travis
- template: python-wheels/travis.linux.arm64.yml
- params:
- python_version: 3.6
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-manylinux2014_aarch64.whl
-
- wheel-manylinux2014-cp37m-arm64:
- ci: travis
- template: python-wheels/travis.linux.arm64.yml
- params:
- python_version: 3.7
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-manylinux2014_aarch64.whl
-
- wheel-manylinux2014-cp38-arm64:
- ci: travis
- template: python-wheels/travis.linux.arm64.yml
- params:
- python_version: 3.8
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-manylinux2014_aarch64.whl
-
- wheel-manylinux2014-cp39-arm64:
- ci: travis
- template: python-wheels/travis.linux.arm64.yml
- params:
- python_version: 3.9
- manylinux_version: 2014
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-manylinux2014_aarch64.whl
-
- ############################## Wheel OSX ####################################
-
- wheel-osx-mavericks-cp36m:
+ wheel-osx-{{ macos_codename }}-{{ python_tag }}:
ci: github
template: python-wheels/github.osx.yml
params:
- python_version: 3.6
- macos_deployment_target: 10.9
- arrow_s3: "OFF"
+ python_version: {{ python_version }}
+ macos_deployment_target: {{ macos_version }}
+ arrow_s3: {{ arrow_s3 }}
artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-macosx_10_9_x86_64.whl
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
- wheel-osx-mavericks-cp37m:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.7
- macos_deployment_target: 10.9
- arrow_s3: "OFF"
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-macosx_10_9_x86_64.whl
+{% endfor %}
- wheel-osx-mavericks-cp38:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.8
- macos_deployment_target: 10.9
- arrow_s3: "OFF"
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-macosx_10_9_x86_64.whl
+{############################## Wheel Windows ################################}
- wheel-osx-mavericks-cp39:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.9
- macos_deployment_target: 10.9
- arrow_s3: "OFF"
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-macosx_10_9_x86_64.whl
-
- # enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl
-
- wheel-osx-high-sierra-cp36m:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.6
- macos_deployment_target: 10.13
- arrow_s3: "ON"
- artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-macosx_10_13_x86_64.whl
-
- wheel-osx-high-sierra-cp37m:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.7
- macos_deployment_target: 10.13
- arrow_s3: "ON"
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-macosx_10_13_x86_64.whl
-
- wheel-osx-high-sierra-cp38:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.8
- macos_deployment_target: 10.13
- arrow_s3: "ON"
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-macosx_10_13_x86_64.whl
-
- wheel-osx-high-sierra-cp39:
- ci: github
- template: python-wheels/github.osx.yml
- params:
- python_version: 3.9
- macos_deployment_target: 10.13
- arrow_s3: "ON"
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-macosx_10_13_x86_64.whl
-
- ############################## Wheel Windows ################################
-
- wheel-windows-cp36m:
+ wheel-windows-{{ python_tag }}:
ci: github
template: python-wheels/github.windows.yml
params:
- python_version: 3.6
+ python_version: {{ python_version }}
artifacts:
- - pyarrow-{no_rc_version}-cp36-cp36m-win_amd64.whl
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl
- wheel-windows-cp37m:
- ci: github
- template: python-wheels/github.windows.yml
- params:
- python_version: 3.7
- artifacts:
- - pyarrow-{no_rc_version}-cp37-cp37m-win_amd64.whl
+{% endfor %}
- wheel-windows-cp38:
- ci: github
- template: python-wheels/github.windows.yml
- params:
- python_version: 3.8
- artifacts:
- - pyarrow-{no_rc_version}-cp38-cp38-win_amd64.whl
-
- wheel-windows-cp39:
- ci: github
- template: python-wheels/github.windows.yml
- params:
- python_version: 3.9
- artifacts:
- - pyarrow-{no_rc_version}-cp39-cp39-win_amd64.whl
-
- ############################ Python sdist ####################################
+{############################ Python sdist ####################################}
python-sdist:
ci: github
@@ -1259,7 +1082,7 @@
- plasma-store-server-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
- plasma-store-server-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
- centos-8-aarch64:
+ centos-8-arm64:
ci: travis
template: linux-packages/travis.linux.arm64.yml
params:
@@ -1360,198 +1183,49 @@
########################### Release verification ############################
- verify-rc-binaries-binary:
+{% for target in ["binary", "yum", "apt"] %}
+ verify-rc-binaries-{{ target }}:
ci: github
template: verify-rc/github.linux.yml
params:
env:
TEST_DEFAULT: 0
- TEST_BINARY: 1
+ TEST_{{ target|upper }}: 1
artifact: "binaries"
+{% endfor %}
- verify-rc-binaries-apt:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_APT: 1
- artifact: "binaries"
+{% for platform in ["linux", "macos"] %}
- verify-rc-binaries-yum:
+ verify-rc-wheels-{{ platform }}:
ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_YUM: 1
- artifact: "binaries"
-
- verify-rc-wheels-linux:
- ci: github
- template: verify-rc/github.linux.yml
+ template: verify-rc/github.{{ platform }}.yml
params:
env:
TEST_DEFAULT: 0
artifact: "wheels"
- verify-rc-wheels-macos:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- artifact: "wheels"
+{% for target in ["csharp",
+ "go",
+ "integration"
+ "java",
+ "js",
+ "python",
+ "ruby",
+ "rust"] %}
- verify-rc-source-macos-java:
+ verify-rc-source-{{ platform }}-{{ target }}:
ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_JAVA: 1
- artifact: "source"
-
- verify-rc-source-macos-csharp:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_CSHARP: 1
- artifact: "source"
-
- verify-rc-source-macos-ruby:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_RUBY: 1
- artifact: "source"
-
- verify-rc-source-macos-python:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_PYTHON: 1
- # https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package
- MACOSX_DEPLOYMENT_TARGET: "10.15"
- artifact: "source"
-
- verify-rc-source-macos-js:
- ci: github
- template: verify-rc/github.osx.yml
+ template: verify-rc/github.{{ platform }}.yml
params:
env:
INSTALL_NODE: 0
TEST_DEFAULT: 0
- TEST_JS: 1
+ TEST_{{ target|upper }}: 1
artifact: "source"
- verify-rc-source-macos-go:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_GO: 1
- artifact: "source"
+{% endfor %}
- verify-rc-source-macos-rust:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_RUST: 1
- artifact: "source"
-
- verify-rc-source-macos-integration:
- ci: github
- template: verify-rc/github.osx.yml
- params:
- env:
- INSTALL_NODE: 0
- TEST_DEFAULT: 0
- TEST_INTEGRATION: 1
- artifact: "source"
-
- verify-rc-source-linux-java:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_JAVA: 1
- artifact: "source"
-
- verify-rc-source-linux-csharp:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_CSHARP: 1
- artifact: "source"
-
- verify-rc-source-linux-ruby:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_RUBY: 1
- artifact: "source"
-
- verify-rc-source-linux-python:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_PYTHON: 1
- artifact: "source"
-
- verify-rc-source-linux-js:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- INSTALL_NODE: 0
- TEST_DEFAULT: 0
- TEST_JS: 1
- artifact: "source"
-
- verify-rc-source-linux-go:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_GO: 1
- artifact: "source"
-
- verify-rc-source-linux-rust:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- TEST_DEFAULT: 0
- TEST_RUST: 1
- artifact: "source"
-
- verify-rc-source-linux-integration:
- ci: github
- template: verify-rc/github.linux.yml
- params:
- env:
- INSTALL_NODE: 0
- TEST_DEFAULT: 0
- TEST_INTEGRATION: 1
- artifact: "source"
+{% endfor %}
verify-rc-source-windows:
ci: github
@@ -1565,19 +1239,20 @@
params:
script: "verify-release-candidate-wheels.bat"
- ############################## Docker tests #################################
+{############################## Docker tests #################################}
- test-conda-cpp:
+{% for image in ["conda-cpp",
+ "conda-cpp-valgrind",
+ "debian-c-glib",
+ "ubuntu-c-glib",
+ "debian-ruby",
+ "ubuntu-ruby"] %}
+ test-{{ image }}:
ci: github
template: docker-tests/github.linux.yml
params:
- run: conda-cpp
-
- test-conda-cpp-valgrind:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- run: conda-cpp-valgrind
+ run: {{ image }}
+{% endfor %}
test-debian-10-cpp:
ci: github
@@ -1627,21 +1302,15 @@
UBUNTU: 20.04
run: ubuntu-cpp
- test-ubuntu-20.04-cpp-14:
+{% for cpp_standard in [14, 17] %}
+ test-ubuntu-20.04-cpp-{{ cpp_standard }}:
ci: github
template: docker-tests/github.linux.yml
params:
env:
UBUNTU: 20.04
- run: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD=14 ubuntu-cpp"
-
- test-ubuntu-20.04-cpp-17:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- env:
- UBUNTU: 20.04
- run: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD=17 ubuntu-cpp"
+ run: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }} ubuntu-cpp"
+{% endfor %}
test-ubuntu-20.04-cpp-thread-sanitizer:
ci: github
@@ -1652,53 +1321,15 @@
UBUNTU: 20.04
run: ubuntu-cpp-thread-sanitizer
- test-debian-c-glib:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- run: debian-c-glib
-
- test-ubuntu-c-glib:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- run: ubuntu-c-glib
-
- test-debian-ruby:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- run: debian-ruby
-
- test-ubuntu-ruby:
- ci: azure
- template: docker-tests/azure.linux.yml
- params:
- run: ubuntu-ruby
-
- test-conda-python-3.6:
+{% for python_version in ["3.6", "3.7", "3.8", "3.9"] %}
+ test-conda-python-{{ python_version }}:
ci: github
template: docker-tests/github.linux.yml
params:
env:
- PYTHON: 3.6
+ PYTHON: {{ python_version }}
run: conda-python
-
- test-conda-python-3.7:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- env:
- PYTHON: 3.7
- run: conda-python
-
- test-conda-python-3.8:
- ci: github
- template: docker-tests/github.linux.yml
- params:
- env:
- PYTHON: 3.8
- run: conda-python
+{% endfor %}
test-conda-python-3.8-hypothesis:
ci: github
@@ -1740,7 +1371,7 @@
ci: github
template: r/github.linux.cran.yml
params:
- MATRIX: "${{ matrix.r_image }}"
+ MATRIX: {{ "${{ matrix.r_image }}" }}
test-r-version-compatibility:
ci: github
@@ -1750,7 +1381,7 @@
ci: github
template: r/github.linux.versions.yml
params:
- MATRIX: "${{ matrix.r_version }}"
+ MATRIX: {{ "${{ matrix.r_version }}" }}
test-r-install-local:
ci: github
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.yml
index 4da78c8..8729426 100644
--- a/dev/tasks/verify-rc/github.linux.yml
+++ b/dev/tasks/verify-rc/github.linux.yml
@@ -15,35 +15,24 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
verify:
name: "Verify release candidate Ubuntu {{ artifact }}"
runs-on: ubuntu-20.04
- {%- if env is defined %}
+ {% if env is defined %}
env:
- {%- for key, value in env.items() %}
+ {% for key, value in env.items() %}
{{ key }}: {{ value }}
- {%- endfor %}
- {%- endif %}
+ {% endfor %}
+ {% endif %}
+
steps:
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- shell: bash
- run: cd arrow && ci/scripts/util_checkout.sh
+ {{ macros.github_checkout_arrow()|indent }}
+
- name: Install System Dependencies
run: |
# TODO: don't require removing newer llvms
diff --git a/dev/tasks/verify-rc/github.osx.yml b/dev/tasks/verify-rc/github.macos.yml
similarity index 68%
rename from dev/tasks/verify-rc/github.osx.yml
rename to dev/tasks/verify-rc/github.macos.yml
index a0f6fc4..ab0c656 100644
--- a/dev/tasks/verify-rc/github.osx.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -15,35 +15,24 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
verify:
name: "Verify release candidate macOS {{ artifact }}"
runs-on: macos-latest
- {%- if env is defined %}
+ {% if env is defined %}
env:
- {%- for key, value in env.items() %}
+ {% for key, value in env.items() %}
{{ key }}: {{ value }}
- {%- endfor %}
- {%- endif %}
+ {% endfor %}
+ {% endif %}
+
steps:
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- shell: bash
- run: cd arrow && ci/scripts/util_checkout.sh
+ {{ macros.github_checkout_arrow()|indent }}
+
- name: Install System Dependencies
shell: bash
run: |
diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml
index 7a96f89..5406327 100644
--- a/dev/tasks/verify-rc/github.win.yml
+++ b/dev/tasks/verify-rc/github.win.yml
@@ -15,35 +15,24 @@
# specific language governing permissions and limitations
# under the License.
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
+{% import 'macros.jinja' as macros with context %}
-on:
- push:
- branches:
- - "*-github-*"
+{{ macros.github_header() }}
jobs:
verify:
name: "Verify release candidate Windows source"
runs-on: windows-2016
- {%- if env is defined %}
+ {% if env is defined %}
env:
- {%- for key, value in env.items() %}
+ {% for key, value in env.items() %}
{{ key }}: {{ value }}
- {%- endfor %}
- {%- endif %}
+ {% endfor %}
+ {% endif %}
+
steps:
- - name: Checkout Arrow
- run: |
- git clone --no-checkout {{ arrow.remote }} arrow
- git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
- git -C arrow checkout FETCH_HEAD
- git -C arrow submodule update --init --recursive
- - name: Fetch Submodules and Tags
- shell: bash
- run: cd arrow && ci/scripts/util_checkout.sh
+ {{ macros.github_checkout_arrow()|indent }}
+
- uses: conda-incubator/setup-miniconda@v2
- name: Install System Dependencies
run: |
diff --git a/docs/source/developers/crossbow.rst b/docs/source/developers/crossbow.rst
index d0ad05c..7d5a3ca 100644
--- a/docs/source/developers/crossbow.rst
+++ b/docs/source/developers/crossbow.rst
@@ -125,30 +125,17 @@
Miniconda is preferred, see installation instructions:
https://conda.io/docs/user-guide/install/index.html
-8. Install the python dependencies for the script:
+8. Install the archery toolset containing crossbow itself:
.. code:: bash
- conda install -c conda-forge -y --file arrow/ci/conda_env_crossbow.txt
-
- .. code:: bash
-
- # pygit2 requires libgit2: http://www.pygit2.org/install.html
- pip install \
- jinja2 \
- pygit2 \
- click \
- ruamel.yaml \
- setuptools_scm \
- github3.py \
- toolz \
- jira
+ pip install -e arrow/dev/archery[crossbow]
9. Try running it:
.. code:: bash
- $ python crossbow.py --help
+ $ archery crossbow --help
Usage
-----
@@ -165,8 +152,8 @@
$ git clone https://github.com/kszucs/crossbow
$ cd arrow/dev/tasks
- $ python crossbow.py submit --help # show the available options
- $ python crossbow.py submit conda-win conda-linux conda-osx
+ $ archery crossbow submit --help # show the available options
+ $ archery crossbow submit conda-win conda-linux conda-osx
2. Gets the HEAD commit of the currently checked out branch and
generates the version number based on `setuptools_scm`_. So to build
@@ -175,7 +162,7 @@
.. code:: bash
git checkout ARROW-<ticket number>
- python dev/tasks/crossbow.py submit --dry-run conda-linux conda-osx
+ archery crossbow submit --dry-run conda-linux conda-osx
..
@@ -201,14 +188,14 @@
.. code:: bash
- python crossbow.py status <build id / branch name>
+ archery crossbow status <build id / branch name>
Download the build artifacts
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code:: bash
- python crossbow.py artifacts <build id / branch name>
+ archery crossbow artifacts <build id / branch name>
Examples
~~~~~~~~
@@ -220,7 +207,7 @@
.. code:: bash
- $ python crossbow.py submit debian-stretch conda-linux-gcc-py37-r40
+ $ archery crossbow submit debian-stretch conda-linux-gcc-py37-r40
Repository: https://github.com/kszucs/arrow@tasks
Commit SHA: 810a718836bb3a8cefc053055600bdcc440e6702
Version: 0.9.1.dev48+g810a7188.d20180414
@@ -232,35 +219,34 @@
.. code:: bash
- $ python crossbow.py submit --dry-run task_name
+ $ archery crossbow submit --dry-run task_name
Run only ``conda`` package builds and a Linux one:
.. code:: bash
- $ python crossbow.py submit --group conda centos-7
+ $ archery crossbow submit --group conda centos-7
Run ``wheel`` builds:
.. code:: bash
- $ python crossbow.py submit --group wheel
+ $ archery crossbow submit --group wheel
There are multiple task groups in the ``tasks.yml`` like docker, integration
and cpp-python for running docker based tests.
-``python crossbow.py submit`` supports multiple options and arguments, for more
+``archery crossbow submit`` supports multiple options and arguments, for more
see its help page:
.. code:: bash
- $ python crossbow.py submit --help
+ $ archery crossbow submit --help
.. _conda-forge packages: conda-recipes
.. _Wheels: python-wheels
.. _Linux packages: linux-packages
-.. _Crossbow.py: crossbow.py
.. _Create the queue repository: https://help.github.com/articles/creating-a-new-repository
.. _TravisCI: https://travis-ci.org/getting_started
.. _Appveyor: https://www.appveyor.com/docs/