blob: 4bbde75b74cf4ef47253f8050e346b80e9de55f8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from collections import namedtuple
from io import StringIO
import click
import errno
import json
import logging
import os
import pathlib
import sys
from .benchmark.codec import JsonEncoder
from import RunnerComparator, DEFAULT_THRESHOLD
from .benchmark.runner import BenchmarkRunner, CppBenchmarkRunner
from .lang.cpp import CppCMakeDefinition, CppConfiguration
from .utils.lint import linter, python_numpydoc, LintValidationException
from .utils.logger import logger, ctx as log_ctx
from .utils.source import ArrowSources, InvalidArrowSource
from .utils.tmpdir import tmpdir
# Set default logging to INFO in command line.
class ArrowBool(click.types.BoolParamType):
ArrowBool supports the 'ON' and 'OFF' values on top of the values
supported by BoolParamType. This is convenient to port script which exports
CMake options variables.
name = "boolean"
def convert(self, value, param, ctx):
if isinstance(value, str):
lowered = value.lower()
if lowered == "on":
return True
elif lowered == "off":
return False
return super().convert(value, param, ctx)
BOOL = ArrowBool()
@click.option("--debug", type=BOOL, is_flag=True, default=False,
help="Increase logging with debugging output.")
@click.option("--pdb", type=BOOL, is_flag=True, default=False,
help="Invoke pdb on uncaught exception.")
@click.option("-q", "--quiet", type=BOOL, is_flag=True, default=False,
help="Silence executed commands.")
def archery(ctx, debug, pdb, quiet):
""" Apache Arrow developer utilities.
See sub-commands help with `archery <cmd> --help`.
# Ensure ctx.obj exists
log_ctx.quiet = quiet
if debug:
ctx.debug = debug
if pdb:
import pdb
sys.excepthook = lambda t, v, e:
def validate_arrow_sources(ctx, param, src):
""" Ensure a directory contains Arrow cpp sources. """
return ArrowSources.find(src)
except InvalidArrowSource as e:
raise click.BadParameter(str(e))
build_dir_type = click.Path(dir_okay=True, file_okay=False, resolve_path=True)
# Supported build types
build_type = click.Choice(["debug", "relwithdebinfo", "release"],
# Supported warn levels
warn_level_type = click.Choice(["everything", "checkin", "production"],
simd_level = click.Choice(["NONE", "SSE4_2", "AVX2", "AVX512"],
def cpp_toolchain_options(cmd):
options = [
click.option("--cc", metavar="<compiler>", help="C compiler."),
click.option("--cxx", metavar="<compiler>", help="C++ compiler."),
click.option("--cxx-flags", help="C++ compiler flags."),
help=("Value to pass for ARROW_PACKAGE_PREFIX and "
return _apply_options(cmd, options)
def _apply_options(cmd, options):
for option in options:
cmd = option(cmd)
return cmd
@archery.command(short_help="Initialize an Arrow C++ build")
@click.option("--src", metavar="<arrow_src>", default=None,
help="Specify Arrow source directory")
# toolchain
@click.option("--build-type", default=None, type=build_type,
help="CMake's CMAKE_BUILD_TYPE")
@click.option("--warn-level", default="production", type=warn_level_type,
help="Controls compiler warnings -W(no-)error.")
@click.option("--use-gold-linker", default=True, type=BOOL,
help="Toggles ARROW_USE_LD_GOLD option.")
@click.option("--simd-level", default="SSE4_2", type=simd_level,
help="Toggles ARROW_SIMD_LEVEL option.")
# Tests and benchmarks
@click.option("--with-tests", default=True, type=BOOL,
help="Build with tests.")
@click.option("--with-benchmarks", default=None, type=BOOL,
help="Build with benchmarks.")
@click.option("--with-examples", default=None, type=BOOL,
help="Build with examples.")
@click.option("--with-integration", default=None, type=BOOL,
help="Build with integration test executables.")
# Static checks
@click.option("--use-asan", default=None, type=BOOL,
help="Toggle ARROW_USE_ASAN sanitizer.")
@click.option("--use-tsan", default=None, type=BOOL,
help="Toggle ARROW_USE_TSAN sanitizer.")
@click.option("--use-ubsan", default=None, type=BOOL,
help="Toggle ARROW_USE_UBSAN sanitizer.")
@click.option("--with-fuzzing", default=None, type=BOOL,
help="Toggle ARROW_FUZZING.")
# Components
@click.option("--with-compute", default=None, type=BOOL,
help="Build the Arrow compute module.")
@click.option("--with-csv", default=None, type=BOOL,
help="Build the Arrow CSV parser module.")
@click.option("--with-cuda", default=None, type=BOOL,
help="Build the Arrow CUDA extensions.")
@click.option("--with-dataset", default=None, type=BOOL,
help="Build the Arrow dataset module.")
@click.option("--with-filesystem", default=None, type=BOOL,
help="Build the Arrow filesystem layer.")
@click.option("--with-flight", default=None, type=BOOL,
help="Build with Flight rpc support.")
@click.option("--with-gandiva", default=None, type=BOOL,
help="Build with Gandiva expression compiler support.")
@click.option("--with-hdfs", default=None, type=BOOL,
help="Build the Arrow HDFS bridge.")
@click.option("--with-hiveserver2", default=None, type=BOOL,
help="Build the HiveServer2 client and arrow adapater.")
@click.option("--with-ipc", default=None, type=BOOL,
help="Build the Arrow IPC extensions.")
@click.option("--with-json", default=None, type=BOOL,
help="Build the Arrow JSON parser module.")
@click.option("--with-jni", default=None, type=BOOL,
help="Build the Arrow JNI lib.")
@click.option("--with-mimalloc", default=None, type=BOOL,
help="Build the Arrow mimalloc based allocator.")
@click.option("--with-parquet", default=None, type=BOOL,
help="Build with Parquet file support.")
@click.option("--with-plasma", default=None, type=BOOL,
help="Build with Plasma object store support.")
@click.option("--with-python", default=None, type=BOOL,
help="Build the Arrow CPython extesions.")
@click.option("--with-r", default=None, type=BOOL,
help="Build the Arrow R extensions. This is not a CMake option, "
"it will toggle required options")
@click.option("--with-s3", default=None, type=BOOL,
help="Build Arrow with S3 support.")
# Compressions
@click.option("--with-brotli", default=None, type=BOOL,
help="Build Arrow with brotli compression.")
@click.option("--with-bz2", default=None, type=BOOL,
help="Build Arrow with bz2 compression.")
@click.option("--with-lz4", default=None, type=BOOL,
help="Build Arrow with lz4 compression.")
@click.option("--with-snappy", default=None, type=BOOL,
help="Build Arrow with snappy compression.")
@click.option("--with-zlib", default=None, type=BOOL,
help="Build Arrow with zlib compression.")
@click.option("--with-zstd", default=None, type=BOOL,
help="Build Arrow with zstd compression.")
# CMake extra feature
@click.option("--cmake-extras", type=str, multiple=True,
help="Extra flags/options to pass to cmake invocation. "
"Can be stacked")
@click.option("--install-prefix", type=str,
help="Destination directory where files are installed. Expand to"
"variable exists.")
# misc
@click.option("-f", "--force", type=BOOL, is_flag=True, default=False,
help="Delete existing build directory if found.")
@click.option("--targets", type=str, multiple=True,
help="Generator targets to run. Can be stacked.")
@click.argument("build_dir", type=build_dir_type)
def build(ctx, src, build_dir, force, targets, **kwargs):
""" Initialize a C++ build directory.
The build command creates a directory initialized with Arrow's cpp source
cmake and configuration. It can also optionally invoke the generator to
test the build (and used in scripts).
Note that archery will carry the caller environment. It will also not touch
an existing directory, one must use the `--force` option to remove the
existing directory.
# Initialize build with clang8 and avx2 support in directory `clang8-build`
archery build --cc=clang-8 --cxx=clang++-8 --cxx-flags=-mavx2 clang8-build
# Builds and run test
archery build --targets=all --targets=test build
# Arrow's cpp cmake configuration
conf = CppConfiguration(**kwargs)
# This is a closure around cmake invocation, e.g. calling ``
# yields a directory ready to be run with the generator
cmake_def = CppCMakeDefinition(src.cpp, conf)
# Create build directory
build =, force=force)
for target in targets:
LintCheck = namedtuple('LintCheck', ('option_name', 'help'))
lint_checks = [
LintCheck('clang-format', "Format C++ files with clang-format."),
LintCheck('clang-tidy', "Lint C++ files with clang-tidy."),
LintCheck('cpplint', "Lint C++ files with cpplint."),
LintCheck('iwyu', "Lint changed C++ files with Include-What-You-Use."),
"Format and lint Python files with autopep8 and flake8."),
LintCheck('numpydoc', "Lint Python files with numpydoc."),
LintCheck('cmake-format', "Format CMake files with"),
"Check all sources files for license texts via Apache RAT."),
LintCheck('r', "Lint R files."),
LintCheck('rust', "Lint Rust files."),
LintCheck('docker', "Lint Dockerfiles with hadolint."),
def decorate_lint_command(cmd):
Decorate the lint() command function to add individual per-check options.
for check in lint_checks:
option = click.option("--{0}/--no-{0}".format(check.option_name),
cmd = option(cmd)
return cmd
@archery.command(short_help="Check Arrow source tree for errors")
@click.option("--src", metavar="<arrow_src>", default=".",
help="Specify Arrow source directory")
@click.option("--fix", is_flag=True, type=BOOL, default=False,
help="Toggle fixing the lint errors if the linter supports it.")
@click.option("--iwyu_all", is_flag=True, type=BOOL, default=False,
help="Run IWYU on all C++ files if enabled")
@click.option("-a", "--all", is_flag=True, default=False,
help="Enable all checks.")
def lint(ctx, src, fix, iwyu_all, **checks):
src = ArrowSources(src)
if checks.pop('all'):
# "--all" is given => enable all non-selected checks
for k, v in checks.items():
if v is None:
checks[k] = True
if not any(checks.values()):
raise click.UsageError(
"Need to enable at least one lint check (try --help)")
linter(src, fix, iwyu_all=iwyu_all, **checks)
except LintValidationException:
@archery.command(short_help="Lint python docstring with NumpyDoc")
@click.argument('symbols', nargs=-1)
@click.option("--src", metavar="<arrow_src>", default=None,
help="Specify Arrow source directory")
@click.option("--allow-rule", "-a", multiple=True,
help="Allow only these rules")
@click.option("--disallow-rule", "-d", multiple=True,
help="Disallow these rules")
def numpydoc(src, symbols, allow_rule, disallow_rule):
Pass list of modules or symbols as arguments to restrict the validation.
By default all modules of pyarrow are tried to be validated.
archery numpydoc pyarrow.dataset
archery numpydoc pyarrow.csv pyarrow.json pyarrow.parquet
archery numpydoc pyarrow.array
disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
results = python_numpydoc(symbols, allow_rules=allow_rule,
for result in results:
except LintValidationException:
def benchmark(ctx):
""" Arrow benchmarking.
Use the diff sub-command to benchmark revisions, and/or build directories.
def benchmark_common_options(cmd):
options = [
click.option("--src", metavar="<arrow_src>", show_default=True,
default=None, callback=validate_arrow_sources,
help="Specify Arrow source directory"),
click.option("--preserve", type=BOOL, default=False, show_default=True,
help="Preserve workspace for investigation."),
click.option("--output", metavar="<output>",
type=click.File("w", encoding="utf8"), default="-",
help="Capture output result into file."),
click.option("--cmake-extras", type=str, multiple=True,
help="Extra flags/options to pass to cmake invocation. "
"Can be stacked"),
cmd = cpp_toolchain_options(cmd)
return _apply_options(cmd, options)
def benchmark_filter_options(cmd):
options = [
click.option("--suite-filter", metavar="<regex>", show_default=True,
type=str, default=None,
help="Regex filtering benchmark suites."),
click.option("--benchmark-filter", metavar="<regex>",
show_default=True, type=str, default=None,
help="Regex filtering benchmarks.")
return _apply_options(cmd, options)
@benchmark.command(name="list", short_help="List benchmark suite")
@click.argument("rev_or_path", metavar="[<rev_or_path>]",
default="WORKSPACE", required=False)
def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
""" List benchmark suite.
with tmpdir(preserve=preserve) as root:
logger.debug("Running benchmark {}".format(rev_or_path))
conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
runner_base = BenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf)
for b in runner_base.list_benchmarks:
click.echo(b, file=output)
@benchmark.command(name="run", short_help="Run benchmark suite")
@click.argument("rev_or_path", metavar="[<rev_or_path>]",
default="WORKSPACE", required=False)
@click.option("--repetitions", type=int, default=1, show_default=True,
help=("Number of repetitions of each benchmark. Increasing "
"may improve result precision."))
def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
suite_filter, benchmark_filter, repetitions, **kwargs):
""" Run benchmark suite.
This command will run the benchmark suite for a single build. This is
used to capture (and/or publish) the results.
The caller can optionally specify a target which is either a git revision
(commit, tag, special values like HEAD) or a cmake build directory.
When a commit is referenced, a local clone of the arrow sources (specified
via --src) is performed and the proper branch is created. This is done in
a temporary directory which can be left intact with the `--preserve` flag.
The special token "WORKSPACE" is reserved to specify the current git
workspace. This imply that no clone will be performed.
# Run the benchmarks on current git workspace
archery benchmark run
# Run the benchmarks on current previous commit
archery benchmark run HEAD~1
# Run the benchmarks on current previous commit
archery benchmark run --output=run.json
with tmpdir(preserve=preserve) as root:
logger.debug("Running benchmark {}".format(rev_or_path))
conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
runner_base = BenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf,
suite_filter=suite_filter, benchmark_filter=benchmark_filter)
json.dump(runner_base, output, cls=JsonEncoder)
@benchmark.command(name="diff", short_help="Compare benchmark suites")
@click.option("--threshold", type=float, default=DEFAULT_THRESHOLD,
help="Regression failure threshold in percentage.")
@click.option("--repetitions", type=int, default=1, show_default=True,
help=("Number of repetitions of each benchmark. Increasing "
"may improve result precision."))
@click.option("--no-counters", type=BOOL, default=False, is_flag=True,
help="Hide counters field in diff report.")
@click.argument("contender", metavar="[<contender>",
default=ArrowSources.WORKSPACE, required=False)
@click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
def benchmark_diff(ctx, src, preserve, output, cmake_extras,
suite_filter, benchmark_filter, repetitions, no_counters,
threshold, contender, baseline, **kwargs):
"""Compare (diff) benchmark runs.
This command acts like git-diff but for benchmark results.
The caller can optionally specify both the contender and the baseline. If
unspecified, the contender will default to the current workspace (like git)
and the baseline will default to master.
Each target (contender or baseline) can either be a git revision
(commit, tag, special values like HEAD) or a cmake build directory. This
allow comparing git commits, and/or different compilers and/or compiler
When a commit is referenced, a local clone of the arrow sources (specified
via --src) is performed and the proper branch is created. This is done in
a temporary directory which can be left intact with the `--preserve` flag.
The special token "WORKSPACE" is reserved to specify the current git
workspace. This imply that no clone will be performed.
# Compare workspace (contender) with master (baseline)
archery benchmark diff
# Compare master (contender) with latest version (baseline)
export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1)
archery benchmark diff master "$LAST"
# Compare g++7 (contender) with clang++-8 (baseline) builds
archery build --with-benchmarks=true \\
--cxx-flags=-ftree-vectorize \\
--cc=gcc-7 --cxx=g++-7 gcc7-build
archery build --with-benchmarks=true \\
--cxx-flags=-flax-vector-conversions \\
--cc=clang-8 --cxx=clang++-8 clang8-build
archery benchmark diff gcc7-build clang8-build
# Compare default targets but scoped to the suites matching
# `^arrow-compute-aggregate` and benchmarks matching `(Sum|Mean)Kernel`.
archery benchmark diff --suite-filter="^arrow-compute-aggregate" \\
# Capture result in file `result.json`
archery benchmark diff --output=result.json
# Equivalently with no stdout clutter.
archery --quiet benchmark diff > result.json
# Comparing with a cached results from `archery benchmark run`
archery benchmark run --output=run.json HEAD~1
# This should not recompute the benchmark from run.json
archery --quiet benchmark diff WORKSPACE run.json > result.json
with tmpdir(preserve=preserve) as root:
logger.debug("Comparing {} (contender) with {} (baseline)"
.format(contender, baseline))
conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
runner_cont = BenchmarkRunner.from_rev_or_path(
src, root, contender, conf,
runner_base = BenchmarkRunner.from_rev_or_path(
src, root, baseline, conf,
runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
# TODO(kszucs): test that the output is properly formatted jsonlines
comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
formatted = _format_comparisons_with_pandas(comparisons_json,
def _get_comparisons_as_json(comparisons):
buf = StringIO()
for comparator in comparisons:
json.dump(comparator, buf, cls=JsonEncoder)
return buf.getvalue()
def _format_comparisons_with_pandas(comparisons_json, no_counters):
import pandas as pd
df = pd.read_json(StringIO(comparisons_json), lines=True)
# parse change % so we can sort by it
df['change %'] = df.pop('change').str[:-1].map(float)
first_regression = len(df) - df['regression'].sum()
fields = ['benchmark', 'baseline', 'contender', 'change %']
if not no_counters:
fields += ['counters']
df = df[fields].sort_values(by='change %', ascending=False)
def labelled(title, df):
if len(df) == 0:
return ''
title += ': ({})'.format(len(df))
df_str = df.to_string(index=False)
bar = '-' * df_str.index('\n')
return '\n'.join([bar, title, bar, df_str])
return '\n\n'.join([labelled('Non-regressions', df[:first_regression]),
labelled('Regressions', df[first_regression:])])
# ----------------------------------------------------------------------
# Integration testing
def _set_default(opt, default):
if opt is None:
return default
return opt
@archery.command(short_help="Execute protocol and Flight integration tests")
@click.option('--with-all', is_flag=True, default=False,
help=('Include all known languages by default '
'in integration tests'))
@click.option('--random-seed', type=int, default=12345,
help="Seed for PRNG when generating test data")
@click.option('--with-cpp', type=bool, default=False,
help='Include C++ in integration tests')
@click.option('--with-java', type=bool, default=False,
help='Include Java in integration tests')
@click.option('--with-js', type=bool, default=False,
help='Include JavaScript in integration tests')
@click.option('--with-go', type=bool, default=False,
help='Include Go in integration tests')
@click.option('--with-rust', type=bool, default=False,
help='Include Rust in integration tests')
@click.option('--write_generated_json', default=False,
help='Generate test JSON to indicated path')
@click.option('--run-flight', is_flag=True, default=False,
help='Run Flight integration tests')
@click.option('--debug', is_flag=True, default=False,
help='Run executables in debug mode as relevant')
@click.option('--serial', is_flag=True, default=False,
help='Run tests serially, rather than in parallel')
@click.option('--tempdir', default=None,
help=('Directory to use for writing '
'integration test temporary files'))
@click.option('stop_on_error', '-x', '--stop-on-error',
is_flag=True, default=False,
help='Stop on first error')
@click.option('--gold-dirs', multiple=True,
help="gold integration test file paths")
@click.option('-k', '--match',
help=("Substring for test names to include in run, "
"e.g. -k primitive"))
def integration(with_all=False, random_seed=12345, **args):
from .integration.runner import write_js_test_json, run_all_tests
import numpy as np
# FIXME(bkietz) Include help strings for individual testers.
# For example, CPPTester's ARROW_CPP_EXE_PATH environment variable.
# Make runs involving data generation deterministic
gen_path = args['write_generated_json']
languages = ['cpp', 'java', 'js', 'go', 'rust']
enabled_languages = 0
for lang in languages:
param = 'with_{}'.format(lang)
if with_all:
args[param] = with_all
if args[param]:
enabled_languages += 1
if gen_path:
except OSError as e:
if e.errno != errno.EEXIST:
if enabled_languages == 0:
raise Exception("Must enable at least 1 language to test")
@click.option('--event-name', '-n', required=True)
@click.option('--event-payload', '-p', type=click.File('r', encoding='utf8'),
default='-', required=True)
@click.option('--arrow-token', envvar='ARROW_GITHUB_TOKEN',
help='OAuth token for responding comment in the arrow repo')
@click.option('--crossbow-token', '-ct', envvar='CROSSBOW_GITHUB_TOKEN',
help='OAuth token for pushing to the crossow repository')
def trigger_bot(event_name, event_payload, arrow_token, crossbow_token):
from .bot import CommentBot, actions
event_payload = json.loads(
bot = CommentBot(name='github-actions', handler=actions, token=arrow_token)
bot.handle(event_name, event_payload)
def _mock_compose_calls(compose):
from types import MethodType
from subprocess import CompletedProcess
def _mock(compose, executable):
def _execute(self, *args, **kwargs):
params = ['{}={}'.format(k, v)
for k, v in self.config.params.items()]
command = ' '.join(params + [executable] + list(args))
return CompletedProcess([], 0)
return MethodType(_execute, compose)
compose._execute_docker = _mock(compose, executable='docker')
compose._execute_compose = _mock(compose, executable='docker-compose')'docker')
@click.option("--src", metavar="<arrow_src>", default=None,
help="Specify Arrow source directory.")
@click.option('--dry-run/--execute', default=False,
help="Display the docker-compose commands instead of executing "
def docker_compose(obj, src, dry_run):
"""Interact with docker-compose based builds."""
from .docker import DockerCompose
config_path = src.path / 'docker-compose.yml'
if not config_path.exists():
raise click.ClickException(
"Docker compose configuration cannot be found in directory {}, "
"try to pass the arrow source directory explicitly.".format(src)
# take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
# environment variables to keep the usage similar to docker-compose
compose = DockerCompose(config_path, params=os.environ)
if dry_run:
obj['compose'] = compose
@click.option('--force-pull/--no-pull', default=True,
help="Whether to force pull the image and its ancestor images")
@click.option('--using-docker-cli', default=False, is_flag=True,
help="Use docker CLI directly for building instead of calling "
"docker-compose. This may help to reuse cached layers.")
@click.option('--using-docker-buildx', default=False, is_flag=True,
help="Use buildx with docker CLI directly for building instead "
"of calling docker-compose or the plain docker build "
"command. This option makes the build cache reusable "
"across hosts.")
@click.option('--use-cache/--no-cache', default=True,
help="Whether to use cache when building the image and its "
"ancestor images")
@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
help="Whether to use cache when building only the (leaf) image "
"passed as the argument. To disable caching for both the "
"image and its ancestors use --no-cache option.")
def docker_compose_build(obj, image, *, force_pull, using_docker_cli,
using_docker_buildx, use_cache, use_leaf_cache):
Execute docker-compose builds.
from .docker import UndefinedImage
compose = obj['compose']
using_docker_cli |= using_docker_buildx
if force_pull:
compose.pull(image, pull_leaf=use_leaf_cache,
using_docker=using_docker_cli), use_cache=use_cache,
except UndefinedImage as e:
raise click.ClickException(
"There is no service/image defined in docker-compose.yml with "
"name: {}".format(str(e))
except RuntimeError as e:
raise click.ClickException(str(e))
@click.argument('command', required=False, default=None)
@click.option('--env', '-e', multiple=True,
help="Set environment variable within the container")
@click.option('--user', '-u', default=None,
help="Username or UID to run the container with")
@click.option('--force-pull/--no-pull', default=True,
help="Whether to force pull the image and its ancestor images")
@click.option('--force-build/--no-build', default=True,
help="Whether to force build the image and its ancestor images")
@click.option('--build-only', default=False, is_flag=True,
help="Pull and/or build the image, but do not run it")
@click.option('--using-docker-cli', default=False, is_flag=True,
help="Use docker CLI directly for building instead of calling "
"docker-compose. This may help to reuse cached layers.")
@click.option('--using-docker-buildx', default=False, is_flag=True,
help="Use buildx with docker CLI directly for building instead "
"of calling docker-compose or the plain docker build "
"command. This option makes the build cache reusable "
"across hosts.")
@click.option('--use-cache/--no-cache', default=True,
help="Whether to use cache when building the image and its "
"ancestor images")
@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
help="Whether to use cache when building only the (leaf) image "
"passed as the argument. To disable caching for both the "
"image and its ancestors use --no-cache option.")
@click.option('--volume', '-v', multiple=True,
help="Set volume within the container")
def docker_compose_run(obj, image, command, *, env, user, force_pull,
force_build, build_only, using_docker_cli,
using_docker_buildx, use_cache,
use_leaf_cache, volume):
"""Execute docker-compose builds.
To see the available builds run `archery docker images`.
# execute a single build
archery docker run conda-python
# execute the builds but disable the image pulling
archery docker run --no-cache conda-python
# pass a docker-compose parameter, like the python version
PYTHON=3.8 archery docker run conda-python
# disable the cache only for the leaf image
PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
# entirely skip building the image
archery docker run --no-pull --no-build conda-python
# pass runtime parameters via docker environment variables
archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
# set a volume
archery docker run -v $PWD/build:/build ubuntu-cpp
# starting an interactive bash session for debugging
archery docker run ubuntu-cpp bash
from .docker import UndefinedImage
compose = obj['compose']
using_docker_cli |= using_docker_buildx
env = dict(kv.split('=', 1) for kv in env)
if force_pull:
compose.pull(image, pull_leaf=use_leaf_cache,
if force_build:, use_cache=use_cache,
if build_only:
except UndefinedImage as e:
raise click.ClickException(
"There is no service/image defined in docker-compose.yml with "
"name: {}".format(str(e))
except RuntimeError as e:
raise click.ClickException(str(e))
@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
help='Docker repository username')
@click.option('--password', '-p', required=False,
help='Docker repository password')
@click.option('--using-docker-cli', default=False, is_flag=True,
help="Use docker CLI directly for building instead of calling "
"docker-compose. This may help to reuse cached layers.")
def docker_compose_push(obj, image, user, password, using_docker_cli):
"""Push the generated docker-compose image."""
compose = obj['compose']
compose.push(image, user=user, password=password,
def docker_compose_images(obj):
"""List the available docker-compose images."""
compose = obj['compose']
click.echo('Available images:')
for image in compose.images():
click.echo(' - {}'.format(image))'release')
@click.option("--src", metavar="<arrow_src>", default=None,
help="Specify Arrow source directory.")
@click.option("--jira-cache", type=click.Path(), default=None,
help="File path to cache queried JIRA issues per version.")
def release(obj, src, jira_cache):
"""Release releated commands."""
from .release import Jira, CachedJira
jira = Jira()
if jira_cache is not None:
jira = CachedJira(jira_cache, jira=jira)
obj['jira'] = jira
obj['repo'] = src.path
def release_curate(obj, version):
"""Release curation."""
from .release import Release
release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
curation = release.curate()
def release_changelog():
"""Release changelog."""
def release_changelog_add(obj, version):
"""Prepend the changelog with the current release"""
from .release import Release
jira, repo = obj['jira'], obj['repo']
# just handle the current version
release = Release.from_jira(version, jira=jira, repo=repo)
if release.is_released:
raise ValueError('This version has been already released!')
changelog = release.changelog()
changelog_path = pathlib.Path(repo) / ''
current_content = changelog_path.read_text()
new_content = changelog.render('markdown') + current_content
click.echo(" is updated!")
@click.argument('output', type=click.File('w', encoding='utf8'), default='-')
def release_changelog_generate(obj, version, output):
"""Generate the changelog of a specific release."""
from .release import Release
jira, repo = obj['jira'], obj['repo']
# just handle the current version
release = Release.from_jira(version, jira=jira, repo=repo)
changelog = release.changelog()
def release_changelog_regenerate(obj):
"""Regeneretate the whole file"""
from .release import Release
jira, repo = obj['jira'], obj['repo']
changelogs = []
for version in jira.arrow_versions():
if not version.released:
release = Release.from_jira(version, jira=jira, repo=repo)
click.echo('Querying changelog for version: {}'.format(version))
click.echo('Rendering new file...')
changelog_path = pathlib.Path(repo) / ''
with'w') as fp:
for cl in changelogs:
@click.option('--dry-run/--execute', default=True,
help="Display the git commands instead of executing them.")
@click.option('--recreate/--continue', default=True,
help="Recreate the maintenance branch or only apply unapplied "
def release_cherry_pick(obj, version, dry_run, recreate):
Cherry pick commits.
from .release import Release, MinorRelease, PatchRelease
release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
if not isinstance(release, (MinorRelease, PatchRelease)):
raise click.UsageError('Cherry-pick command only supported for minor '
'and patch releases')
if not dry_run:
click.echo('Executed the following commands:\n')
'git checkout {} -b {}'.format(release.previous.tag, release.branch)
for commit in release.commits_to_pick():
click.echo('git cherry-pick {}'.format(commit.hexsha))
from .crossbow.cli import crossbow # noqa
except ImportError as exc:
missing_package =
context_settings={"ignore_unknown_options": True}
def crossbow():
raise click.ClickException(
"Couldn't import crossbow because of missing dependency: {}"
if __name__ == "__main__":