| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| name: Build and test PySpark on macOS |
| |
| on: |
| workflow_call: |
| inputs: |
| java: |
| required: false |
| type: string |
| default: 17 |
| python: |
| required: false |
| type: string |
| default: 3.12 |
| branch: |
| description: Branch to run the build against |
| required: false |
| type: string |
| default: master |
| hadoop: |
| description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. |
| required: false |
| type: string |
| default: hadoop3 |
| os: |
| description: OS to run this build. |
| required: false |
| type: string |
| default: macos-15 |
| arch: |
| description: The target architecture (x86, x64, arm64) of the Python interpreter. |
| required: false |
| type: string |
| default: arm64 |
| envs: |
| description: Additional environment variables to set when running the tests. Should be in JSON format. |
| required: false |
| type: string |
| default: '{}' |
| jobs: |
| build: |
| name: "Build modules: ${{ matrix.modules }}" |
| runs-on: ${{ inputs.os }} |
| # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 |
| # timeout-minutes: 150 |
| strategy: |
| fail-fast: false |
| max-parallel: 20 |
| matrix: |
| java: |
| - ${{ inputs.java }} |
| python: |
| - ${{inputs.python}} |
| modules: |
| - >- |
| pyspark-sql, pyspark-resource, pyspark-testing |
| - >- |
| pyspark-core, pyspark-errors, pyspark-streaming |
| - >- |
| pyspark-mllib, pyspark-ml, pyspark-ml-connect |
| - >- |
| pyspark-structured-streaming, pyspark-structured-streaming-connect |
| - >- |
| pyspark-connect |
| - >- |
| pyspark-pandas |
| - >- |
| pyspark-pandas-slow |
| - >- |
| pyspark-pandas-connect |
| - >- |
| pyspark-pandas-slow-connect |
| env: |
| MODULES_TO_TEST: ${{ matrix.modules }} |
| PYTHON_TO_TEST: python${{inputs.python}} |
| HADOOP_PROFILE: ${{ inputs.hadoop }} |
| HIVE_PROFILE: hive2.3 |
| # GitHub Actions' default miniconda to use in pip packaging test. |
| CONDA_PREFIX: /usr/share/miniconda |
| GITHUB_PREV_SHA: ${{ github.event.before }} |
| SPARK_LOCAL_IP: localhost |
| SKIP_UNIDOC: true |
| SKIP_MIMA: true |
| SKIP_PACKAGING: true |
| METASPACE_SIZE: 1g |
| BRANCH: ${{ inputs.branch }} |
| PYSPARK_TEST_TIMEOUT: 450 |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v6 |
| # In order to fetch changed files |
| with: |
| fetch-depth: 0 |
| repository: apache/spark |
| ref: ${{ inputs.branch }} |
| - name: Sync the current branch with the latest in Apache Spark |
| if: github.repository != 'apache/spark' |
| run: | |
| echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV |
| git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} |
| git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD |
| git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty |
| # Cache local repositories. Note that GitHub Actions cache has a 10G limit. |
| - name: Cache SBT and Maven |
| # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 |
| if: ${{ runner.os != 'macOS' }} |
| uses: actions/cache@v5 |
| with: |
| path: | |
| build/apache-maven-* |
| build/*.jar |
| ~/.sbt |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Coursier local repository |
| # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 |
| if: ${{ runner.os != 'macOS' }} |
| uses: actions/cache@v5 |
| with: |
| path: ~/.cache/coursier |
| key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} |
| restore-keys: | |
| pyspark-coursier- |
| - name: Install Java ${{ matrix.java }} |
| uses: actions/setup-java@v5 |
| with: |
| distribution: zulu |
| java-version: ${{ matrix.java }} |
| - name: Install Python ${{matrix.python}} |
| uses: actions/setup-python@v6 |
| with: |
| python-version: ${{matrix.python}} |
| architecture: ${{ inputs.arch }} |
| - name: Install Python packages (Python ${{matrix.python}}) |
| run: | |
| python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' |
| python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' |
| python${{matrix.python}} -m pip install numpy 'pyarrow>=23.0.0' 'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ |
| python${{matrix.python}} -m pip install 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' 'zstandard==0.25.0' 'graphviz==0.20.3' && \ |
| python${{matrix.python}} -m pip cache purge |
| - name: List Python packages |
| run: python${{matrix.python}} -m pip list |
| # Run the tests. |
| - name: Run tests |
| env: ${{ fromJSON(inputs.envs) }} |
| run: | |
| if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then |
| export SKIP_PACKAGING=false |
| echo "Python Packaging Tests Enabled!" |
| fi |
| ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST" |
| - name: Upload test results to report |
| env: ${{ fromJSON(inputs.envs) }} |
| if: always() |
| uses: actions/upload-artifact@v6 |
| with: |
| name: test-results-${{ inputs.os }}-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} |
| path: | |
| **/target/test-reports/*.xml |
| **/target/surefire-reports/*.xml |
| - name: Upload unit tests log files |
| env: ${{ fromJSON(inputs.envs) }} |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v6 |
| with: |
| name: unit-tests-log-${{ inputs.os }}-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} |
| path: "**/target/unit-tests.log" |