| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| name: Build and test using Maven |
| |
| on: |
| workflow_call: |
| inputs: |
| java: |
| required: false |
| type: string |
| default: 17 |
| branch: |
| description: Branch to run the build against |
| required: false |
| type: string |
| default: master |
| hadoop: |
| description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. |
| required: false |
| type: string |
| default: hadoop3 |
| os: |
| description: OS to run this build. |
| required: false |
| type: string |
| default: ubuntu-22.04 |
| envs: |
| description: Additional environment variables to set when running the tests. Should be in JSON format. |
| required: false |
| type: string |
| default: '{}' |
| jobs: |
| # Build: build Spark and run the tests for specified modules using maven |
| build: |
| name: "Build modules using Maven: ${{ matrix.modules }} ${{ matrix.comment }}" |
| runs-on: ${{ inputs.os }} |
| strategy: |
| fail-fast: false |
| matrix: |
| java: |
| - ${{ inputs.java }} |
| hadoop: |
| - ${{ inputs.hadoop }} |
| hive: |
| - hive2.3 |
| modules: |
| - >- |
| core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#variant |
| - >- |
| graphx,streaming,hadoop-cloud |
| - >- |
| mllib-local,mllib |
| - >- |
| repl,sql#hive-thriftserver |
| - >- |
| connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro,connector#kinesis-asl |
| - >- |
| sql#api,sql#catalyst,resource-managers#yarn,resource-managers#kubernetes#core |
| # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| included-tags: [ "" ] |
| excluded-tags: [ "" ] |
| comment: [ "" ] |
| include: |
| # Connect tests |
| - modules: connect |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| # TODO(SPARK-47110): Reenble AmmoniteTest tests in Maven builds |
| excluded-tags: org.apache.spark.tags.AmmoniteTest |
| comment: "" |
| # Hive tests |
| - modules: sql#hive |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| included-tags: org.apache.spark.tags.SlowHiveTest |
| comment: "- slow tests" |
| - modules: sql#hive |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| excluded-tags: org.apache.spark.tags.SlowHiveTest |
| comment: "- other tests" |
| # SQL tests |
| - modules: sql#core |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| included-tags: org.apache.spark.tags.ExtendedSQLTest |
| comment: "- extended tests" |
| - modules: sql#core |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| included-tags: org.apache.spark.tags.SlowSQLTest |
| comment: "- slow tests" |
| - modules: sql#core |
| java: ${{ inputs.java }} |
| hadoop: ${{ inputs.hadoop }} |
| hive: hive2.3 |
| excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest |
| comment: "- other tests" |
| env: |
| MODULES_TO_TEST: ${{ matrix.modules }} |
| EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| INCLUDED_TAGS: ${{ matrix.included-tags }} |
| HADOOP_PROFILE: ${{ matrix.hadoop }} |
| HIVE_PROFILE: ${{ matrix.hive }} |
| SPARK_LOCAL_IP: localhost |
| GITHUB_PREV_SHA: ${{ github.event.before }} |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v4 |
| # In order to fetch changed files |
| with: |
| fetch-depth: 0 |
| repository: apache/spark |
| ref: ${{ inputs.branch }} |
| - name: Sync the current branch with the latest in Apache Spark |
| if: github.repository != 'apache/spark' |
| run: | |
| echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV |
| git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} |
| git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD |
| git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty |
| # Cache local repositories. Note that GitHub Actions cache has a 10G limit. |
| - name: Cache SBT and Maven |
| uses: actions/cache@v4 |
| with: |
| path: | |
| build/apache-maven-* |
| build/*.jar |
| ~/.sbt |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Maven local repository |
| uses: actions/cache@v4 |
| with: |
| path: ~/.m2/repository |
| key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} |
| restore-keys: | |
| java${{ matrix.java }}-maven- |
| - name: Install Java ${{ matrix.java }} |
| uses: actions/setup-java@v4 |
| with: |
| distribution: zulu |
| java-version: ${{ matrix.java }} |
| - name: Install Python 3.11 |
| uses: actions/setup-python@v5 |
| # We should install one Python that is higher than 3+ for SQL and Yarn because: |
| # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. |
| # - Yarn has a Python specific test too, for example, YarnClusterSuite. |
| # macos (14) already has its Python installed, see also SPARK-47096 and |
| # https://github.com/actions/runner-images/blob/main/images/macos/macos-14-Readme.md |
| if: contains(inputs.os, 'ubuntu') && (contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')) |
| with: |
| python-version: '3.11' |
| architecture: x64 |
| - name: Install Python packages (Python 3.11) |
| if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') |
| run: | |
| python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' |
| python3.11 -m pip list |
| # Run the tests. |
| - name: Run tests |
| env: ${{ fromJSON(inputs.envs) }} |
| run: | |
| export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" |
| export MAVEN_CLI_OPTS="--no-transfer-progress" |
| export JAVA_VERSION=${{ matrix.java }} |
| export ENABLE_KINESIS_TESTS=0 |
| # Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10 |
| export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"` |
| ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install |
| if [[ "$INCLUDED_TAGS" != "" ]]; then |
| ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae |
| elif [[ "$MODULES_TO_TEST" == "connect" ]]; then |
| ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae |
| elif [[ "$EXCLUDED_TAGS" != "" ]]; then |
| ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae |
| elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then |
| # To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead |
| ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae |
| else |
| ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae |
| fi |
| - name: Clean up local Maven repository |
| run: | |
| rm -rf ~/.m2/repository/org/apache/spark |
| - name: Upload test results to report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} |
| path: "**/target/test-reports/*.xml" |
| - name: Upload unit tests log files |
| if: failure() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} |
| path: "**/target/unit-tests.log" |