ARROW-13447: [Release] Verification script for arm64 and universal2 macOS wheels

- verification script for arm64 macos wheels
- verification tasks for arm64 macos wheels
- verification tasks for source release on arm64 macos

Closes #10799 from kszucs/apple-silicon-verification

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index f9fe8f9..82e0339 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -159,3 +159,7 @@
   echo "There are non-bundled shared library dependencies."
   exit 1
 fi
+
+# Move the verified wheels
+mkdir -p ${source_dir}/python/repaired_wheels
+mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/
diff --git a/ci/scripts/python_wheel_manylinux_test.sh b/ci/scripts/python_wheel_manylinux_test.sh
deleted file mode 100755
index 2198774..0000000
--- a/ci/scripts/python_wheel_manylinux_test.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -x
-set -o pipefail
-
-case $# in
-  1) KIND="$1"
-     case $KIND in
-       imports|unittests) ;;
-       *) echo "Invalid argument: '${KIND}', valid options are 'imports', 'unittests'"
-          exit 1
-          ;;
-     esac
-     ;;
-  *) echo "Usage: $0 imports|unittests"
-     exit 1
-     ;;
-esac
-
-export PYARROW_TEST_CYTHON=OFF
-export PYARROW_TEST_DATASET=ON
-export PYARROW_TEST_GANDIVA=OFF
-export PYARROW_TEST_HDFS=ON
-export PYARROW_TEST_ORC=ON
-export PYARROW_TEST_PANDAS=ON
-export PYARROW_TEST_PARQUET=ON
-export PYARROW_TEST_PLASMA=ON
-export PYARROW_TEST_S3=ON
-export PYARROW_TEST_TENSORFLOW=ON
-export PYARROW_TEST_FLIGHT=ON
-
-export ARROW_TEST_DATA=/arrow/testing/data
-export PARQUET_TEST_DATA=/arrow/submodules/parquet-testing/data
-
-# Install the built wheels
-pip install /arrow/python/repaired_wheels/*.whl
-
-if [ "${KIND}" == "imports" ]; then
-  # Test that the modules are importable
-  python -c "
-import pyarrow
-import pyarrow._hdfs
-import pyarrow._s3fs
-import pyarrow.csv
-import pyarrow.dataset
-import pyarrow.flight
-import pyarrow.fs
-import pyarrow.json
-import pyarrow.orc
-import pyarrow.parquet
-import pyarrow.plasma"
-elif [ "${KIND}" == "unittests" ]; then
-  # Execute unittest, test dependencies must be installed
-  pytest -r s --pyargs pyarrow
-fi
diff --git a/ci/scripts/python_wheel_macos_test.sh b/ci/scripts/python_wheel_unix_test.sh
similarity index 63%
rename from ci/scripts/python_wheel_macos_test.sh
rename to ci/scripts/python_wheel_unix_test.sh
index 5dabf6e..50d3ccb 100755
--- a/ci/scripts/python_wheel_macos_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -17,12 +17,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
+set -x
+set -o pipefail
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <arrow-src-dir>"
+  exit 1
+fi
 
 source_dir=${1}
 
-: ${ARROW_S3:=ON}
 : ${ARROW_FLIGHT:=ON}
+: ${ARROW_S3:=ON}
+: ${CHECK_IMPORTS:=ON}
+: ${CHECK_UNITTESTS:=ON}
+: ${INSTALL_PYARROW:=ON}
 
 export PYARROW_TEST_CYTHON=OFF
 export PYARROW_TEST_DATASET=ON
@@ -39,11 +49,14 @@
 export ARROW_TEST_DATA=${source_dir}/testing/data
 export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
 
-# Install the built wheels
-pip install ${source_dir}/python/dist/*.whl
+if [ "${INSTALL_PYARROW}" == "ON" ]; then
+  # Install the built wheels
+  pip install ${source_dir}/python/repaired_wheels/*.whl
+fi
 
-# Test that the modules are importable
-python -c "
+if [ "${CHECK_IMPORTS}" == "ON" ]; then
+  # Test that the modules are importable
+  python -c "
 import pyarrow
 import pyarrow._hdfs
 import pyarrow.csv
@@ -54,16 +67,18 @@
 import pyarrow.parquet
 import pyarrow.plasma
 "
-
-if [ "${PYARROW_TEST_S3}" == "ON" ]; then
-  python -c "import pyarrow._s3fs"
-fi
-if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
-  python -c "import pyarrow.flight"
+  if [ "${PYARROW_TEST_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+  if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
 fi
 
-# Install testing dependencies
-pip install -r ${source_dir}/python/requirements-wheel-test.txt
-
-# Execute unittest
-pytest -r s --pyargs pyarrow
+if [ "${CHECK_UNITTESTS}" == "ON" ]; then
+  # Install testing dependencies
+  pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+  # Execute unittest, test dependencies must be installed
+  python -c 'import pyarrow; pyarrow.create_library_symlinks()'
+  pytest -r s --pyargs pyarrow
+fi
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 8352e58..586fd58 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -19,6 +19,7 @@
 
 set PYARROW_TEST_CYTHON=OFF
 set PYARROW_TEST_DATASET=ON
+set PYARROW_TEST_FLIGHT=ON
 set PYARROW_TEST_GANDIVA=OFF
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
@@ -27,7 +28,6 @@
 set PYARROW_TEST_PLASMA=OFF
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
-set PYARROW_TEST_FLIGHT=ON
 
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 5dc6d86..bf8e51f 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -572,29 +572,6 @@
   fi
 }
 
-check_python_imports() {
-   python << IMPORT_TESTS
-import platform
-
-import pyarrow
-import pyarrow.parquet
-import pyarrow.plasma
-import pyarrow.fs
-import pyarrow._hdfs
-import pyarrow.dataset
-import pyarrow.flight
-
-if platform.system() == "Darwin":
-    macos_version = tuple(map(int, platform.mac_ver()[0].split('.')))
-    check_s3fs = macos_version >= (10, 13)
-else:
-    check_s3fs = True
-
-if check_s3fs:
-    import pyarrow._s3fs
-IMPORT_TESTS
-}
-
 test_linux_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
   local manylinuxes="2010 2014"
@@ -608,12 +585,7 @@
     for ml_spec in ${manylinuxes}; do
       # check the mandatory and optional imports
       pip install python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-manylinux${ml_spec}_x86_64.whl
-      check_python_imports
-
-      # install test requirements and execute the tests
-      pip install -r ${ARROW_DIR}/python/requirements-test.txt
-      python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-      pytest --pyargs pyarrow
+      INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
     done
 
     conda deactivate
@@ -622,7 +594,23 @@
 
 test_macos_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
+  local macos_version=$(sw_vers -productVersion)
+  local macos_short_version=${macos_version:0:5}
 
+  local check_s3=ON
+  local check_flight=ON
+
+  # macOS version <= 10.13
+  if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then
+    local check_s3=OFF
+  fi
+  # apple silicon processor
+  if [ "$(uname -m)" = "arm64" ]; then
+    local py_arches="3.9"
+    local check_flight=OFF
+  fi
+
+  # verify arch-native wheels inside an arch-native conda environment
   for py_arch in ${py_arches}; do
     local env=_verify_wheel-${py_arch}
     conda create -yq -n ${env} python=${py_arch//m/}
@@ -631,15 +619,42 @@
 
     # check the mandatory and optional imports
     pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION}
-    check_python_imports
-
-    # install test requirements and execute the tests
-    pip install -r ${ARROW_DIR}/python/requirements-test.txt
-    python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-    pytest --pyargs pyarrow
+    INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+      ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
 
     conda deactivate
   done
+
+  # verify arm64 and universal2 wheels using an universal2 python binary
+  # the interpreter should be installed from python.org:
+  #   https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg
+  if [ "$(uname -m)" = "arm64" ]; then
+    for py_arch in ${py_arches}; do
+      local pyver=${py_arch//m/}
+      local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}"
+
+      # create and activate a virtualenv for testing as arm64
+      for arch in "arm64" "x86_64"; do
+        local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv"
+        $python -m virtualenv $venv
+        source $venv/bin/activate
+        pip install -U pip
+
+        # install pyarrow's universal2 wheel
+        pip install \
+            --find-links python-rc/${VERSION}-rc${RC_NUMBER} \
+            --target $(python -c 'import site; print(site.getsitepackages()[0])') \
+            --platform macosx_11_0_universal2 \
+            --only-binary=:all: \
+            pyarrow==${VERSION}
+        # check the imports and execute the unittests
+        INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+          arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+
+        deactivate
+      done
+    done
+  fi
 }
 
 test_wheels() {
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
index 863bd7f..8078abf 100644
--- a/dev/tasks/python-wheels/github.osx.amd64.yml
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -104,7 +104,7 @@
           $PYTHON -m virtualenv test-env
           source test-env/bin/activate
           pip install --upgrade pip wheel
-          arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
-      {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
-      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
index 90e15a6..1000510 100644
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -122,7 +122,7 @@
           $PYTHON -m virtualenv test-arm64-env
           source test-arm64-env/bin/activate
           pip install --upgrade pip wheel
-          arch -arm64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arch -arm64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {% if arch == "universal2" %}
       - name: Test Wheel on AMD64
@@ -131,7 +131,7 @@
           $PYTHON -m virtualenv test-amd64-env
           source test-amd64-env/bin/activate
           pip install --upgrade pip wheel
-          arch -x86_64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
       {% endif %}
 
       - name: Upload artifacts
@@ -150,4 +150,4 @@
         env:
           CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
 
-      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 02a1428..a28547d 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -113,10 +113,10 @@
     - verify-rc-source-*
 
   verify-rc-source-macos:
-    - verify-rc-source-macos-*
+    - verify-rc-source-*-macos-*
 
   verify-rc-source-linux:
-    - verify-rc-source-linux-*
+    - verify-rc-source-*-linux-*
 
 {######################## Tasks to run regularly #############################}
 
@@ -734,9 +734,9 @@
   ########################### Release verification ############################
 
 {% for target in ["binary", "yum", "apt"] %}
-  verify-rc-binaries-{{ target }}:
+  verify-rc-binaries-{{ target }}-amd64:
     ci: github
-    template: verify-rc/github.linux.yml
+    template: verify-rc/github.linux.amd64.yml
     params:
       env:
         TEST_DEFAULT: 0
@@ -744,16 +744,8 @@
       artifact: "binaries"
 {% endfor %}
 
-{% for platform in ["linux", "macos"] %}
-
-  verify-rc-wheels-{{ platform }}:
-    ci: github
-    template: verify-rc/github.{{ platform }}.yml
-    params:
-      env:
-        TEST_DEFAULT: 0
-      artifact: "wheels"
-
+{% for platform, arch, runner in [("linux", "amd64", "ubuntu-20.04"),
+                                  ("macos", "amd64", "macos-10.15")] %}
   {% for target in ["cpp",
                     "csharp",
                     "go",
@@ -763,20 +755,83 @@
                     "python",
                     "ruby"] %}
 
-  verify-rc-source-{{ platform }}-{{ target }}:
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
     ci: github
-    template: verify-rc/github.{{ platform }}.yml
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
     params:
       env:
         INSTALL_NODE: 0
         TEST_DEFAULT: 0
         TEST_{{ target|upper }}: 1
       artifact: "source"
-
+      github_runner: {{ runner }}
   {% endfor %}
-
 {% endfor %}
 
+{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %}
+  {% for target in ["cpp",
+                    "csharp",
+                    "go",
+                    "integration",
+                    "js",
+                    "python",
+                    "ruby"] %}
+
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
+    ci: github
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
+    params:
+      env:
+        ARROW_FLIGHT: 0
+        ARROW_GANDIVA: 0
+        INSTALL_NODE: 0
+        TEST_DEFAULT: 0
+        TEST_INTEGRATION_JAVA: 0
+        TEST_{{ target|upper }}: 1
+      artifact: "source"
+      github_runner: {{ runner }}
+  {% endfor %}
+{% endfor %}
+
+  verify-rc-wheels-linux-amd64:
+    ci: github
+    template: verify-rc/github.linux.amd64.yml
+    params:
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-10.15-amd64:
+    ci: github
+    template: verify-rc/github.macos.amd64.yml
+    params:
+      github_runner: "macos-10.15"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available:
+  #   https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+  verify-rc-wheels-macos-11-amd64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "x86_64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-11-arm64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "arm64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
   verify-rc-source-windows:
     ci: github
     template: verify-rc/github.win.yml
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.amd64.yml
similarity index 97%
rename from dev/tasks/verify-rc/github.linux.yml
rename to dev/tasks/verify-rc/github.linux.amd64.yml
index fdd9de6..8a4613a 100644
--- a/dev/tasks/verify-rc/github.linux.yml
+++ b/dev/tasks/verify-rc/github.linux.amd64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate Ubuntu {{ artifact }}"
-    runs-on: ubuntu-20.04
+    runs-on: {{ github_runner|default("ubuntu-20.04") }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.amd64.yml
similarity index 96%
rename from dev/tasks/verify-rc/github.macos.yml
rename to dev/tasks/verify-rc/github.macos.amd64.yml
index ab0c656..d39cda3 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.amd64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate macOS {{ artifact }}"
-    runs-on: macos-latest
+    runs-on: {{ github_runner|default("macos-latest") }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
similarity index 77%
copy from dev/tasks/verify-rc/github.macos.yml
copy to dev/tasks/verify-rc/github.macos.arm64.yml
index ab0c656..26139ed 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate macOS {{ artifact }}"
-    runs-on: macos-latest
+    runs-on: {{ github_runner }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}
@@ -31,20 +31,18 @@
     {% endif %}
 
     steps:
+      - name: Cleanup
+        shell: bash
+        run: rm -rf arrow
+
       {{ macros.github_checkout_arrow()|indent }}
 
-      - name: Install System Dependencies
-        shell: bash
-        run: |
-          brew update
-          brew bundle --file=arrow/cpp/Brewfile
-          brew bundle --file=arrow/c_glib/Brewfile
-      - uses: actions/setup-node@v2-beta
-        with:
-          node-version: '14'
       - name: Run verification
         shell: bash
         run: |
-          arrow/dev/release/verify-release-candidate.sh \
+          export PATH="$(brew --prefix node@14)/bin:$PATH"
+          export PATH="$(brew --prefix ruby)/bin:$PATH"
+          export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
+          arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \
             {{ artifact }} \
             {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/docker-compose.yml b/docker-compose.yml
index 84ce4da..4290578 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -792,7 +792,10 @@
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh imports
+    environment:
+      CHECK_IMPORTS: "ON"
+      CHECK_UNITTESTS: "OFF"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-manylinux-test-unittests:
     image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test
@@ -807,7 +810,10 @@
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests
+    environment:
+      CHECK_IMPORTS: "OFF"
+      CHECK_UNITTESTS: "ON"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-windows-vs2017:
     # The windows images must be built locally and pushed to a remote registry: