| #!/usr/bin/env bash |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| # Requirements |
| # - Ruby >= 2.3 |
| # - gcc >= 4.8 |
| # - Go >= 1.22 |
| # - Docker |
| # |
| # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to |
| # LD_LIBRARY_PATH. |
| # |
| # To reuse build artifacts between runs set ARROW_TMPDIR environment variable to |
| # a directory where the temporary files should be placed to, note that this |
| # directory is not cleaned up automatically. |
| |
| set -e |
| set -o pipefail |
| |
| if [ ${VERBOSE:-0} -gt 0 ]; then |
| set -x |
| fi |
| |
| case $# in |
| 0) VERSION="HEAD" |
| SOURCE_KIND="local" |
| TEST_BINARIES=0 |
| ;; |
| 1) VERSION="$1" |
| SOURCE_KIND="git" |
| TEST_BINARIES=0 |
| ;; |
| 2) VERSION="$1" |
| RC_NUMBER="$2" |
| SOURCE_KIND="tarball" |
| ;; |
| *) echo "Usage:" |
| echo " Verify release candidate:" |
| echo " $0 X.Y.Z RC_NUMBER" |
| echo " Verify only the source distribution:" |
| echo " TEST_DEFAULT=0 TEST_SOURCE=1 $0 X.Y.Z RC_NUMBER" |
| echo " Verify only the binary distributions:" |
| echo " TEST_DEFAULT=0 TEST_BINARIES=1 $0 X.Y.Z RC_NUMBER" |
| echo " Verify only the wheels:" |
| echo " TEST_DEFAULT=0 TEST_WHEELS=1 $0 X.Y.Z RC_NUMBER" |
| echo "" |
| echo " Run the source verification tasks on a remote git revision:" |
| echo " $0 GIT-REF" |
| echo " Run the source verification tasks on this arrow checkout:" |
| echo " $0" |
| exit 1 |
| ;; |
| esac |
| |
| # Note that these point to the current verify-release-candidate.sh directories |
| # which is different from the ARROW_SOURCE_DIR set in ensure_source_directory() |
| SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" |
| ARROW_DIR="$(cd "${SOURCE_DIR}/../.." && pwd)" |
| |
| : ${GITHUB_REPOSITORY:=apache/arrow} |
| |
| show_header() { |
| echo "" |
| printf '=%.0s' $(seq ${#1}); printf '\n' |
| echo "${1}" |
| printf '=%.0s' $(seq ${#1}); printf '\n' |
| } |
| |
| show_info() { |
| echo "└ ${1}" |
| } |
| |
| detect_cuda() { |
| show_header "Detect CUDA" |
| |
| if ! (which nvcc && which nvidia-smi) > /dev/null; then |
| echo "No devices found." |
| return 1 |
| fi |
| |
| local n_gpus=$(nvidia-smi --list-gpus | wc -l) |
| echo "Found ${n_gpus} GPU." |
| return $((${n_gpus} < 1)) |
| } |
| |
| ARROW_RC_URL="https://dist.apache.org/repos/dist/dev/arrow" |
| ARROW_KEYS_URL="https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/KEYS" |
| |
| download_file() { |
| curl \ |
| --silent \ |
| --show-error \ |
| --fail \ |
| --location \ |
| --output "$2" \ |
| "$1" |
| } |
| |
| download_rc_file() { |
| download_file \ |
| "${ARROW_RC_URL}/apache-arrow-${VERSION}-rc${RC_NUMBER}/$1" \ |
| "$1" |
| } |
| |
| import_gpg_keys() { |
| if [ "${GPGKEYS_ALREADY_IMPORTED:-0}" -gt 0 ]; then |
| return 0 |
| fi |
| download_file "${ARROW_KEYS_URL}" KEYS |
| gpg --import KEYS |
| |
| GPGKEYS_ALREADY_IMPORTED=1 |
| } |
| |
| if type shasum >/dev/null 2>&1; then |
| sha256_verify="shasum -a 256 -c" |
| sha512_verify="shasum -a 512 -c" |
| else |
| sha256_verify="sha256sum -c" |
| sha512_verify="sha512sum -c" |
| fi |
| |
| fetch_archive() { |
| import_gpg_keys |
| |
| local dist_name=$1 |
| download_rc_file ${dist_name}.tar.gz |
| download_rc_file ${dist_name}.tar.gz.asc |
| download_rc_file ${dist_name}.tar.gz.sha256 |
| download_rc_file ${dist_name}.tar.gz.sha512 |
| gpg --verify ${dist_name}.tar.gz.asc ${dist_name}.tar.gz |
| ${sha256_verify} ${dist_name}.tar.gz.sha256 |
| ${sha512_verify} ${dist_name}.tar.gz.sha512 |
| } |
| |
| verify_dir_artifact_signatures() { |
| import_gpg_keys |
| |
| # verify the signature and the checksums of each artifact |
| find $1 -name '*.asc' | while read sigfile; do |
| artifact=${sigfile/.asc/} |
| gpg --verify $sigfile $artifact |
| |
| # go into the directory because the checksum files contain only the |
| # basename of the artifact |
| pushd $(dirname $artifact) |
| base_artifact=$(basename $artifact) |
| if [ -f $base_artifact.sha256 ]; then |
| ${sha256_verify} $base_artifact.sha256 |
| fi |
| if [ -f $base_artifact.sha512 ]; then |
| ${sha512_verify} $base_artifact.sha512 |
| fi |
| popd |
| done |
| } |
| |
| test_binary() { |
| # this downloads all artifacts and verifies their checksums and signatures |
| show_header "Testing binary artifacts" |
| maybe_setup_conda |
| |
| local download_dir=${ARROW_TMPDIR}/binaries |
| mkdir -p ${download_dir} |
| |
| ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ |
| --dest=${download_dir} \ |
| --repository=${GITHUB_REPOSITORY} \ |
| --tag="apache-arrow-$VERSION-rc$RC_NUMBER" |
| |
| verify_dir_artifact_signatures ${download_dir} |
| } |
| |
| check_verification_result_on_github() { |
| pushd ${ARROW_TMPDIR} |
| curl \ |
| --get \ |
| --data "branch=apache-arrow-${VERSION}-rc${RC_NUMBER}" \ |
| "https://api.github.com/repos/apache/arrow/actions/workflows/verify_rc.yml/runs" | \ |
| jq '.workflow_runs[0]' > latest_verify_rc.json |
| conclusion="$(jq -r '.conclusion' latest_verify_rc.json)" |
| if [ "${conclusion}" != "success" ]; then |
| html_url="$(jq -r '.html_url' latest_verify_rc.json)" |
| echo "Verification on GitHub wasn't successful: ${conclusion}: ${html_url}" |
| exit 1 |
| fi |
| popd |
| } |
| |
| test_apt() { |
| show_header "Testing APT packages" |
| |
| if [ "${GITHUB_ACTIONS}" != "true" ]; then |
| check_verification_result_on_github |
| return 0 |
| fi |
| |
| case "$(arch)" in |
| "x86_64") |
| for target in "debian:bookworm" \ |
| "debian:trixie" \ |
| "debian:forky" \ |
| "ubuntu:jammy" \ |
| "ubuntu:noble"; do \ |
| if ! docker run \ |
| --platform=linux/x86_64 \ |
| --rm \ |
| --security-opt="seccomp=unconfined" \ |
| --volume "${ARROW_DIR}":/arrow:delegated \ |
| "${target}" \ |
| /arrow/dev/release/verify-apt.sh \ |
| "${VERSION}" \ |
| "rc"; then |
| echo "Failed to verify the APT repository for ${target} on x86_64" |
| exit 1 |
| fi |
| done |
| ;; |
| "aarch64") |
| for target in "arm64v8/debian:bookworm" \ |
| "arm64v8/debian:trixie" \ |
| "arm64v8/debian:forky" \ |
| "arm64v8/ubuntu:jammy" \ |
| "arm64v8/ubuntu:noble"; do \ |
| if ! docker run \ |
| --platform=linux/arm64 \ |
| --rm \ |
| --security-opt="seccomp=unconfined" \ |
| --volume "${ARROW_DIR}":/arrow:delegated \ |
| "${target}" \ |
| /arrow/dev/release/verify-apt.sh \ |
| "${VERSION}" \ |
| "rc"; then |
| echo "Failed to verify the APT repository for ${target} on arm64" |
| exit 1 |
| fi |
| done |
| ;; |
| esac |
| } |
| |
| test_yum() { |
| show_header "Testing Yum packages" |
| |
| if [ "${GITHUB_ACTIONS}" != "true" ]; then |
| check_verification_result_on_github |
| return 0 |
| fi |
| |
| case "$(arch)" in |
| "x86_64") |
| for target in "almalinux:10" \ |
| "almalinux:9" \ |
| "almalinux:8" \ |
| "amazonlinux:2023" \ |
| "quay.io/centos/centos:stream9"; do |
| if ! docker run \ |
| --platform linux/x86_64 \ |
| --rm \ |
| --security-opt="seccomp=unconfined" \ |
| --volume "${ARROW_DIR}":/arrow:delegated \ |
| "${target}" \ |
| /arrow/dev/release/verify-yum.sh \ |
| "${VERSION}" \ |
| "rc"; then |
| echo "Failed to verify the Yum repository for ${target} on x86_64" |
| exit 1 |
| fi |
| done |
| ;; |
| "aarch64") |
| for target in "arm64v8/almalinux:10" \ |
| "arm64v8/almalinux:9" \ |
| "arm64v8/almalinux:8" \ |
| "arm64v8/amazonlinux:2023" \ |
| "quay.io/centos/centos:stream9"; do |
| if ! docker run \ |
| --platform linux/arm64 \ |
| --rm \ |
| --security-opt="seccomp=unconfined" \ |
| --volume "${ARROW_DIR}":/arrow:delegated \ |
| "${target}" \ |
| /arrow/dev/release/verify-yum.sh \ |
| "${VERSION}" \ |
| "rc"; then |
| echo "Failed to verify the Yum repository for ${target} on arm64" |
| exit 1 |
| fi |
| done |
| ;; |
| esac |
| } |
| |
| setup_tempdir() { |
| cleanup() { |
| if [ "${TEST_SUCCESS}" = "yes" ]; then |
| rm -fr "${ARROW_TMPDIR}" |
| else |
| echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details." |
| fi |
| } |
| |
| show_header "Creating temporary directory" |
| |
| if [ -z "${ARROW_TMPDIR}" ]; then |
| # clean up automatically if ARROW_TMPDIR is not defined |
| ARROW_TMPDIR=$(mktemp -d -t "arrow-${VERSION}.XXXXX") |
| trap cleanup EXIT |
| else |
| # don't clean up automatically |
| mkdir -p "${ARROW_TMPDIR}" |
| fi |
| |
| echo "Working in sandbox ${ARROW_TMPDIR}" |
| } |
| |
| install_conda() { |
| # Setup short-lived miniconda for Python and integration tests |
| show_info "Ensuring that Conda is installed..." |
| local prefix=$ARROW_TMPDIR/miniforge |
| |
| # Setup miniconda only if the directory doesn't exist yet |
| if [ "${CONDA_ALREADY_INSTALLED:-0}" -eq 0 ]; then |
| if [ ! -d "${prefix}" ]; then |
| show_info "Installing miniconda at ${prefix}..." |
| local arch=$(uname -m) |
| local platform=$(uname) |
| local url="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${platform}-${arch}.sh" |
| curl -sL -o miniconda.sh $url |
| bash miniconda.sh -b -p $prefix |
| rm -f miniconda.sh |
| else |
| show_info "Miniconda already installed at ${prefix}" |
| fi |
| else |
| show_info "Conda installed at ${prefix}" |
| fi |
| CONDA_ALREADY_INSTALLED=1 |
| |
| # Creating a separate conda environment |
| . $prefix/etc/profile.d/conda.sh |
| conda activate base |
| } |
| |
| maybe_setup_conda() { |
| # Optionally setup conda environment with the passed dependencies |
| local env="conda-${CONDA_ENV:-source}" |
| local pyver=${PYTHON_VERSION:-3} |
| |
| if [ "${USE_CONDA}" -gt 0 ]; then |
| show_info "Configuring Conda environment..." |
| |
| # Deactivate previous env |
| if [ ! -z ${CONDA_PREFIX} ]; then |
| conda deactivate || : |
| fi |
| # Ensure that conda is installed |
| install_conda |
| # Create environment |
| if ! conda env list | cut -d" " -f 1 | grep $env; then |
| mamba create -y -n $env python=${pyver} |
| fi |
| # Install dependencies |
| if [ $# -gt 0 ]; then |
| mamba install -y -n $env $@ |
| fi |
| # Activate the environment |
| conda activate $env |
| elif [ ! -z ${CONDA_PREFIX} ]; then |
| echo "Conda environment is active despite that USE_CONDA is set to 0." |
| echo "Deactivate the environment using \`conda deactivate\` before running the verification script." |
| return 1 |
| fi |
| } |
| |
| maybe_setup_virtualenv() { |
| # Optionally setup pip virtualenv with the passed dependencies |
| local env="venv-${VENV_ENV:-source}" |
| local pyver=${PYTHON_VERSION:-3} |
| local python=${PYTHON:-"python${pyver}"} |
| local virtualenv="${ARROW_TMPDIR}/${env}" |
| local skip_missing_python=${SKIP_MISSING_PYTHON:-0} |
| |
| if [ "${USE_CONDA}" -eq 0 ]; then |
| show_info "Configuring Python ${pyver} virtualenv..." |
| |
| if [ ! -z ${CONDA_PREFIX} ]; then |
| echo "Conda environment is active despite that USE_CONDA is set to 0." |
| echo "Deactivate the environment before running the verification script." |
| return 1 |
| fi |
| # Deactivate previous env |
| if command -v deactivate &> /dev/null; then |
| deactivate |
| fi |
| # Check that python interpreter exists |
| if ! command -v "${python}" &> /dev/null; then |
| echo "Couldn't locate python interpreter with version ${pyver}" |
| echo "Call the script with USE_CONDA=1 to test all of the python versions." |
| return 1 |
| else |
| show_info "Found interpreter $($python --version): $(which $python)" |
| fi |
| # Create environment |
| if [ ! -d "${virtualenv}" ]; then |
| show_info "Creating python virtualenv at ${virtualenv}..." |
| $python -m venv ${virtualenv} |
| # Activate the environment |
| source "${virtualenv}/bin/activate" |
| # Upgrade pip and setuptools |
| pip install -U pip setuptools |
| else |
| show_info "Using already created virtualenv at ${virtualenv}" |
| # Activate the environment |
| source "${virtualenv}/bin/activate" |
| fi |
| # Install dependencies |
| if [ $# -gt 0 ]; then |
| show_info "Installed pip packages $@..." |
| pip install "$@" |
| fi |
| fi |
| } |
| |
| test_and_install_cpp() { |
| show_header "Build, install and test C++ libraries" |
| |
| # Build and test C++ |
| maybe_setup_virtualenv numpy |
| maybe_setup_conda \ |
| --file ci/conda_env_unix.txt \ |
| --file ci/conda_env_cpp.txt \ |
| --file ci/conda_env_gandiva.txt \ |
| ncurses \ |
| numpy \ |
| sqlite \ |
| compilers |
| |
| if [ "${USE_CONDA}" -gt 0 ]; then |
| DEFAULT_DEPENDENCY_SOURCE="CONDA" |
| CMAKE_PREFIX_PATH="${CONDA_BACKUP_CMAKE_PREFIX_PATH}:${CMAKE_PREFIX_PATH}" |
| else |
| DEFAULT_DEPENDENCY_SOURCE="AUTO" |
| fi |
| |
| mkdir -p $ARROW_TMPDIR/cpp-build |
| pushd $ARROW_TMPDIR/cpp-build |
| |
| if [ ! -z "$CMAKE_GENERATOR" ]; then |
| ARROW_CMAKE_OPTIONS="${ARROW_CMAKE_OPTIONS:-} -G ${CMAKE_GENERATOR}" |
| fi |
| |
| local ARROW_BUILD_INTEGRATION=OFF |
| local ARROW_BUILD_TESTS=OFF |
| if [ ${TEST_INTEGRATION_CPP} -gt 0 ]; then |
| ARROW_BUILD_INTEGRATION=ON |
| fi |
| if [ ${TEST_CPP} -gt 0 ]; then |
| ARROW_BUILD_TESTS=ON |
| fi |
| |
| cmake \ |
| -DARROW_BOOST_USE_SHARED=ON \ |
| -DARROW_BUILD_EXAMPLES=OFF \ |
| -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION} \ |
| -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ |
| -DARROW_BUILD_UTILITIES=ON \ |
| -DARROW_COMPUTE=ON \ |
| -DARROW_CSV=ON \ |
| -DARROW_CUDA=${ARROW_CUDA} \ |
| -DARROW_DATASET=ON \ |
| -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} \ |
| -DARROW_FILESYSTEM=ON \ |
| -DARROW_FLIGHT=${ARROW_FLIGHT} \ |
| -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL} \ |
| -DARROW_GANDIVA=${ARROW_GANDIVA} \ |
| -DARROW_GCS=${ARROW_GCS} \ |
| -DARROW_HDFS=ON \ |
| -DARROW_JSON=ON \ |
| -DARROW_ORC=ON \ |
| -DARROW_PARQUET=ON \ |
| -DARROW_SUBSTRAIT=ON \ |
| -DARROW_S3=${ARROW_S3} \ |
| -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ |
| -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \ |
| -DARROW_WITH_BROTLI=ON \ |
| -DARROW_WITH_BZ2=ON \ |
| -DARROW_WITH_LZ4=ON \ |
| -DARROW_WITH_RE2=ON \ |
| -DARROW_WITH_SNAPPY=ON \ |
| -DARROW_WITH_UTF8PROC=ON \ |
| -DARROW_WITH_ZLIB=ON \ |
| -DARROW_WITH_ZSTD=ON \ |
| -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-release} \ |
| -DCMAKE_INSTALL_LIBDIR=lib \ |
| -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ |
| -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ |
| -DGTest_SOURCE=${GTest_SOURCE:-BUNDLED} \ |
| -DPARQUET_BUILD_EXAMPLES=ON \ |
| -DPARQUET_BUILD_EXECUTABLES=ON \ |
| -DPARQUET_REQUIRE_ENCRYPTION=ON \ |
| ${ARROW_CMAKE_OPTIONS:-} \ |
| ${ARROW_SOURCE_DIR}/cpp |
| export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-${NPROC}} |
| cmake --build . --target install |
| |
| if [ ${TEST_CPP} -gt 0 ]; then |
| LD_LIBRARY_PATH=$PWD/release:$LD_LIBRARY_PATH ctest \ |
| --label-regex unittest \ |
| --output-on-failure \ |
| --parallel $NPROC \ |
| --timeout 300 |
| fi |
| |
| popd |
| } |
| |
| test_python() { |
| show_header "Build and test Python libraries" |
| |
| # Build and test Python |
| maybe_setup_virtualenv |
| maybe_setup_conda --file ci/conda_env_python.txt |
| |
| if [ "${USE_CONDA}" -gt 0 ]; then |
| CMAKE_PREFIX_PATH="${CONDA_BACKUP_CMAKE_PREFIX_PATH}:${CMAKE_PREFIX_PATH}" |
| fi |
| |
| export PYARROW_PARALLEL=$NPROC |
| export PYARROW_WITH_DATASET=1 |
| export PYARROW_WITH_HDFS=1 |
| export PYARROW_WITH_ORC=1 |
| export PYARROW_WITH_PARQUET=1 |
| export PYARROW_WITH_PARQUET_ENCRYPTION=1 |
| if [ "${ARROW_CUDA}" = "ON" ]; then |
| export PYARROW_WITH_CUDA=1 |
| fi |
| if [ "${ARROW_FLIGHT}" = "ON" ]; then |
| export PYARROW_WITH_FLIGHT=1 |
| fi |
| if [ "${ARROW_GANDIVA}" = "ON" ]; then |
| export PYARROW_WITH_GANDIVA=1 |
| fi |
| if [ "${ARROW_GCS}" = "ON" ]; then |
| export PYARROW_WITH_GCS=1 |
| fi |
| if [ "${ARROW_S3}" = "ON" ]; then |
| export PYARROW_WITH_S3=1 |
| fi |
| |
| pushd python |
| |
| # Build pyarrow |
| python -m pip install -e . |
| |
| # Check mandatory and optional imports |
| python -c " |
| import pyarrow |
| import pyarrow._hdfs |
| import pyarrow.csv |
| import pyarrow.dataset |
| import pyarrow.fs |
| import pyarrow.json |
| import pyarrow.orc |
| import pyarrow.parquet |
| " |
| if [ "${ARROW_CUDA}" == "ON" ]; then |
| python -c "import pyarrow.cuda" |
| fi |
| if [ "${ARROW_FLIGHT}" == "ON" ]; then |
| python -c "import pyarrow.flight" |
| fi |
| if [ "${ARROW_GANDIVA}" == "ON" ]; then |
| python -c "import pyarrow.gandiva" |
| fi |
| if [ "${ARROW_GCS}" == "ON" ]; then |
| python -c "import pyarrow._gcsfs" |
| fi |
| if [ "${ARROW_S3}" == "ON" ]; then |
| python -c "import pyarrow._s3fs" |
| fi |
| |
| |
| # Install test dependencies |
| pip install -r requirements-test.txt |
| |
| # Execute pyarrow unittests |
| pytest pyarrow -v |
| |
| popd |
| } |
| |
| test_glib() { |
| show_header "Build and test C GLib libraries" |
| |
| # Build and test C GLib |
| maybe_setup_conda glib gobject-introspection meson ninja ruby |
| maybe_setup_virtualenv meson |
| |
| # Install bundler if doesn't exist |
| if ! bundle --version; then |
| gem install --no-document bundler |
| fi |
| |
| local build_dir=$ARROW_TMPDIR/c-glib-build |
| mkdir -p $build_dir |
| |
| pushd c_glib |
| |
| # Build the C GLib bindings |
| meson \ |
| --buildtype=${CMAKE_BUILD_TYPE:-release} \ |
| --libdir=lib \ |
| --prefix=$ARROW_HOME \ |
| $build_dir |
| ninja -C $build_dir |
| ninja -C $build_dir install |
| |
| # Test the C GLib bindings |
| export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH |
| bundle config set --local path 'vendor/bundle' |
| bundle install |
| bundle exec ruby test/run-test.rb |
| |
| popd |
| } |
| |
| test_ruby() { |
| show_header "Build and test Ruby libraries" |
| |
| # required dependencies are installed by test_glib |
| maybe_setup_conda |
| maybe_setup_virtualenv |
| |
| which ruby |
| which bundle |
| |
| pushd ruby |
| |
| local modules="red-arrow red-arrow-dataset red-parquet" |
| if [ "${ARROW_CUDA}" = "ON" ]; then |
| modules="${modules} red-arrow-cuda" |
| fi |
| if [ "${ARROW_FLIGHT}" = "ON" ]; then |
| modules="${modules} red-arrow-flight" |
| fi |
| if [ "${ARROW_FLIGHT_SQL}" = "ON" ]; then |
| modules="${modules} red-arrow-flight-sql" |
| fi |
| if [ "${ARROW_GANDIVA}" = "ON" ]; then |
| modules="${modules} red-gandiva" |
| fi |
| |
| for module in ${modules}; do |
| pushd ${module} |
| bundle config set --local path 'vendor/bundle' |
| bundle install |
| bundle exec ruby test/run-test.rb |
| popd |
| done |
| |
| popd |
| } |
| |
| # Run integration tests |
| test_integration() { |
| show_header "Build and execute integration tests" |
| |
| maybe_setup_conda |
| maybe_setup_virtualenv |
| |
| pip install -e dev/archery[integration] |
| |
| CPP_BUILD_DIR=$ARROW_TMPDIR/cpp-build |
| |
| export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release |
| |
| INTEGRATION_TEST_ARGS="" |
| if [ "${ARROW_FLIGHT}" = "ON" ]; then |
| INTEGRATION_TEST_ARGS="${INTEGRATION_TEST_ARGS} --run-flight" |
| fi |
| |
| # Flight integration test executable have runtime dependency on release/libgtest.so |
| LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH archery integration \ |
| --run-ipc --run-flight --run-c-data \ |
| --with-cpp=${TEST_INTEGRATION_CPP} \ |
| $INTEGRATION_TEST_ARGS |
| } |
| |
| ensure_source_directory() { |
| show_header "Ensuring source directory" |
| |
| dist_name="apache-arrow-${VERSION}" |
| |
| if [ "${SOURCE_KIND}" = "local" ]; then |
| # Local arrow repository, testing repositories should be already present |
| if [ -z "$ARROW_SOURCE_DIR" ]; then |
| export ARROW_SOURCE_DIR="${ARROW_DIR}" |
| fi |
| echo "Verifying local Arrow checkout at ${ARROW_SOURCE_DIR}" |
| elif [ "${SOURCE_KIND}" = "git" ]; then |
| # Remote arrow repository, testing repositories must be cloned |
| : ${SOURCE_REPOSITORY:="https://github.com/apache/arrow"} |
| case "${VERSION}" in |
| *.*.*) |
| revision="apache-arrow-${VERSION}" |
| ;; |
| *) |
| revision="${VERSION}" |
| ;; |
| esac |
| echo "Verifying Arrow repository ${SOURCE_REPOSITORY} with revision checkout ${revision}" |
| export ARROW_SOURCE_DIR="${ARROW_TMPDIR}/arrow" |
| if [ ! -d "${ARROW_SOURCE_DIR}" ]; then |
| git clone --recurse-submodules $SOURCE_REPOSITORY $ARROW_SOURCE_DIR |
| git -C $ARROW_SOURCE_DIR checkout "${revision}" |
| fi |
| else |
| # Release tarball, testing repositories must be cloned separately |
| echo "Verifying official Arrow release candidate ${VERSION}-rc${RC_NUMBER}" |
| export ARROW_SOURCE_DIR="${ARROW_TMPDIR}/${dist_name}" |
| if [ ! -d "${ARROW_SOURCE_DIR}" ]; then |
| pushd $ARROW_TMPDIR |
| fetch_archive ${dist_name} |
| tar xf ${dist_name}.tar.gz |
| popd |
| fi |
| fi |
| |
| # Ensure that the testing repositories are prepared |
| if [ ! -d ${ARROW_SOURCE_DIR}/testing/data ]; then |
| if [ -d ${SOURCE_DIR}/../../testing/data ]; then |
| cp -a ${SOURCE_DIR}/../../testing ${ARROW_SOURCE_DIR}/ |
| else |
| git clone \ |
| https://github.com/apache/arrow-testing.git \ |
| ${ARROW_SOURCE_DIR}/testing |
| fi |
| fi |
| if [ ! -d ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing/data ]; then |
| if [ -d ${SOURCE_DIR}/../../cpp/submodules/parquet-testing/data ]; then |
| cp -a \ |
| ${SOURCE_DIR}/../../cpp/submodules/parquet-testing \ |
| ${ARROW_SOURCE_DIR}/cpp/submodules/ |
| else |
| git clone \ |
| https://github.com/apache/parquet-testing.git \ |
| ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing |
| fi |
| fi |
| |
| export ARROW_TEST_DATA=$ARROW_SOURCE_DIR/testing/data |
| export PARQUET_TEST_DATA=$ARROW_SOURCE_DIR/cpp/submodules/parquet-testing/data |
| export ARROW_GDB_SCRIPT=$ARROW_SOURCE_DIR/cpp/gdb_arrow.py |
| } |
| |
| test_source_distribution() { |
| export ARROW_HOME=$ARROW_TMPDIR/install |
| export CMAKE_PREFIX_PATH=$ARROW_HOME${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} |
| export PARQUET_HOME=$ARROW_TMPDIR/install |
| export PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}} |
| |
| if [ "$(uname)" == "Darwin" ]; then |
| NPROC=$(sysctl -n hw.ncpu) |
| export DYLD_LIBRARY_PATH=$ARROW_HOME/lib:${DYLD_LIBRARY_PATH:-} |
| else |
| NPROC=$(nproc) |
| export LD_LIBRARY_PATH=$ARROW_HOME/lib:${LD_LIBRARY_PATH:-} |
| fi |
| |
| pushd $ARROW_SOURCE_DIR |
| |
| if [ "${SOURCE_KIND}" = "tarball" ] && [ "${TEST_SOURCE_REPRODUCIBLE}" -gt 0 ]; then |
| pushd .. |
| git clone "https://github.com/${GITHUB_REPOSITORY}.git" arrow |
| pushd arrow |
| dev/release/utils-create-release-tarball.sh "${VERSION}" "${RC_NUMBER}" |
| tarball="apache-arrow-${VERSION}.tar.gz" |
| if ! cmp "${tarball}" "../${tarball}"; then |
| echo "Source archive isn't reproducible" |
| if ! tar --version | grep --quiet --fixed GNU && \ |
| ! gtar --version | grep --quiet --fixed GNU; then |
| echo "We need GNU tar to verify reproducible build" |
| fi |
| if ! gzip --version | grep --quiet --fixed GNU; then |
| echo "We need GNU gzip to verify reproducible build" |
| fi |
| return 1 |
| fi |
| popd |
| popd |
| fi |
| |
| if [ ${BUILD_CPP} -gt 0 ]; then |
| test_and_install_cpp |
| fi |
| if [ ${TEST_PYTHON} -gt 0 ]; then |
| test_python |
| fi |
| if [ ${TEST_GLIB} -gt 0 ]; then |
| test_glib |
| fi |
| if [ ${TEST_RUBY} -gt 0 ]; then |
| test_ruby |
| fi |
| if [ ${TEST_INTEGRATION} -gt 0 ]; then |
| test_integration |
| fi |
| |
| popd |
| } |
| |
| test_binary_distribution() { |
| if [ ${TEST_BINARY} -gt 0 ]; then |
| test_binary |
| fi |
| if [ ${TEST_APT} -gt 0 ]; then |
| test_apt |
| fi |
| if [ ${TEST_YUM} -gt 0 ]; then |
| test_yum |
| fi |
| if [ ${TEST_WHEELS} -gt 0 ]; then |
| test_wheels |
| fi |
| } |
| |
| test_linux_wheels() { |
| local check_gcs=OFF |
| |
| if [ "$(uname -m)" = "aarch64" ]; then |
| local arch="aarch64" |
| else |
| local arch="x86_64" |
| fi |
| |
| local python_versions="${TEST_PYTHON_VERSIONS:-3.10 3.11 3.12 3.13 3.14}" |
| local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_28_${arch}}" |
| |
| if [ "${SOURCE_KIND}" != "local" ]; then |
| local wheel_content="OFF" |
| fi |
| |
| if [ "${SOURCE_KIND}" = "tarball" ]; then |
| local check_version="ON" |
| else |
| local check_version="OFF" |
| fi |
| |
| for python in ${python_versions}; do |
| local pyver=${python/m} |
| for platform in ${platform_tags}; do |
| show_header "Testing Python ${pyver} wheel for platform ${platform}" |
| CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda tzdata |
| if ! VENV_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv; then |
| continue |
| fi |
| pip install pyarrow-${TEST_PYARROW_VERSION:-${VERSION}}-cp${pyver/.}-cp${python/.}-${platform}.whl |
| ARROW_GCS=${check_gcs} \ |
| ARROW_VERSION=${VERSION} \ |
| CHECK_VERSION=${check_version} \ |
| CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} \ |
| INSTALL_PYARROW=OFF \ |
| ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} |
| done |
| done |
| } |
| |
| test_macos_wheels() { |
| local check_gcs=OFF |
| local check_s3=ON |
| local check_flight=ON |
| |
| # apple silicon processor |
| if [ "$(uname -m)" = "arm64" ]; then |
| local python_versions="3.10 3.11 3.12 3.13 3.14" |
| local platform_tags="macosx_12_0_arm64" |
| local check_flight=OFF |
| else |
| local python_versions="3.10 3.11 3.12 3.13 3.14" |
| local platform_tags="macosx_12_0_x86_64" |
| fi |
| |
| if [ "${SOURCE_KIND}" != "local" ]; then |
| local wheel_content="OFF" |
| fi |
| |
| if [ "${SOURCE_KIND}" = "tarball" ]; then |
| local check_version="ON" |
| else |
| local check_version="OFF" |
| fi |
| |
| # verify arch-native wheels inside an arch-native conda environment |
| for python in ${python_versions}; do |
| local pyver=${python/m} |
| for platform in ${platform_tags}; do |
| show_header "Testing Python ${pyver} wheel for platform ${platform}" |
| if [[ "$platform" == *"10_9"* ]]; then |
| check_gcs=OFF |
| check_s3=OFF |
| fi |
| |
| CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda |
| if ! VENV_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv; then |
| continue |
| fi |
| |
| pip install pyarrow-${VERSION}-cp${pyver/.}-cp${python/.}-${platform}.whl |
| ARROW_FLIGHT=${check_flight} \ |
| ARROW_GCS=${check_gcs} \ |
| ARROW_S3=${check_s3} \ |
| ARROW_VERSION=${VERSION} \ |
| CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} \ |
| CHECK_VERSION=${check_version} \ |
| INSTALL_PYARROW=OFF \ |
| ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} |
| done |
| done |
| } |
| |
| test_wheels() { |
| show_header "Downloading Python wheels" |
| maybe_setup_conda python |
| |
| local wheels_dir= |
| if [ "${SOURCE_KIND}" = "local" ]; then |
| wheels_dir="${ARROW_SOURCE_DIR}/python/repaired_wheels" |
| else |
| local download_dir=${ARROW_TMPDIR}/binaries |
| mkdir -p ${download_dir} |
| |
| if [ "$(uname)" == "Darwin" ]; then |
| local filter_regex=.*macosx.* |
| else |
| local filter_regex=.*manylinux.* |
| fi |
| |
| ${PYTHON:-python3} \ |
| $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ |
| --package_type python \ |
| --regex=${filter_regex} \ |
| --dest=${download_dir} \ |
| --repository=${GITHUB_REPOSITORY} \ |
| --tag="apache-arrow-$VERSION-rc$RC_NUMBER" |
| |
| verify_dir_artifact_signatures ${download_dir} |
| |
| wheels_dir=${download_dir} |
| fi |
| |
| pushd ${wheels_dir} |
| |
| if [ "$(uname)" == "Darwin" ]; then |
| test_macos_wheels |
| else |
| test_linux_wheels |
| fi |
| |
| popd |
| } |
| |
| # By default test all functionalities. |
| # To deactivate one test, deactivate the test and all of its dependents |
| # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 |
| : ${TEST_DEFAULT:=1} |
| |
| # Verification groups |
| : ${TEST_SOURCE:=${TEST_DEFAULT}} |
| : ${TEST_BINARIES:=${TEST_DEFAULT}} |
| |
| # Binary verification tasks |
| : ${TEST_APT:=${TEST_BINARIES}} |
| : ${TEST_BINARY:=${TEST_BINARIES}} |
| : ${TEST_WHEELS:=${TEST_BINARIES}} |
| : ${TEST_YUM:=${TEST_BINARIES}} |
| |
| # Source verification tasks |
| : ${TEST_SOURCE_REPRODUCIBLE:=0} |
| : ${TEST_CPP:=${TEST_SOURCE}} |
| : ${TEST_GLIB:=${TEST_SOURCE}} |
| : ${TEST_RUBY:=${TEST_SOURCE}} |
| : ${TEST_PYTHON:=${TEST_SOURCE}} |
| : ${TEST_INTEGRATION:=${TEST_SOURCE}} |
| |
| # For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1 |
| : ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}} |
| |
| # Automatically build/test if its activated by a dependent |
| TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY})) |
| BUILD_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON} + ${TEST_INTEGRATION_CPP})) |
| TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP})) |
| |
| # Execute tests in a conda environment |
| : ${USE_CONDA:=0} |
| |
| # Build options for the C++ library |
| if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then |
| ARROW_CUDA=ON |
| fi |
| : ${ARROW_CUDA:=OFF} |
| : ${ARROW_FLIGHT_SQL:=ON} |
| : ${ARROW_FLIGHT:=ON} |
| : ${ARROW_GANDIVA:=ON} |
| : ${ARROW_GCS:=OFF} |
| : ${ARROW_S3:=OFF} |
| |
| TEST_SUCCESS=no |
| |
| setup_tempdir |
| ensure_source_directory |
| # Run source tests in a subshell so environment variables |
| # set for source testing aren't exposed to the binary tests. |
| (test_source_distribution) |
| test_binary_distribution |
| |
| TEST_SUCCESS=yes |
| |
| echo "Release candidate ${VERSION}-RC${RC_NUMBER} looks good!" |
| exit 0 |