| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| name: Spark SQL Tests |
| |
| concurrency: |
| group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} |
| cancel-in-progress: true |
| |
| on: |
| push: |
| branches: |
| - main |
| paths-ignore: |
| - "doc/**" |
| - "docs/**" |
| - "**.md" |
| - "native/core/benches/**" |
| - "native/spark-expr/benches/**" |
| - "spark/src/test/scala/org/apache/spark/sql/benchmark/**" |
| pull_request: |
| paths-ignore: |
| - "doc/**" |
| - "docs/**" |
| - "**.md" |
| - "native/core/benches/**" |
| - "native/spark-expr/benches/**" |
| - "spark/src/test/scala/org/apache/spark/sql/benchmark/**" |
| # manual trigger |
| # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow |
| workflow_dispatch: |
| inputs: |
| collect-fallback-logs: |
| description: 'Whether to collect Comet fallback reasons from spark sql unit test logs' |
| required: false |
| default: 'false' |
| type: boolean |
| |
| env: |
| RUST_VERSION: stable |
| |
| jobs: |
| |
| # Build native library once and share with all test jobs |
| build-native: |
| name: Build Native Library |
| runs-on: ubuntu-24.04 |
| container: |
| image: amd64/rust |
| steps: |
| - uses: actions/checkout@v6 |
| |
| - name: Setup Rust toolchain |
| uses: ./.github/actions/setup-builder |
| with: |
| rust-version: ${{ env.RUST_VERSION }} |
| jdk-version: 17 |
| |
| - name: Restore Cargo cache |
| uses: actions/cache/restore@v5 |
| with: |
| path: | |
| ~/.cargo/registry |
| ~/.cargo/git |
| native/target |
| key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}-${{ hashFiles('native/**/*.rs') }} |
| restore-keys: | |
| ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}- |
| |
| - name: Build native library (CI profile) |
| run: | |
| cd native |
| cargo build --profile ci |
| env: |
| RUSTFLAGS: "-Ctarget-cpu=x86-64-v3" |
| |
| - name: Upload native library |
| uses: actions/upload-artifact@v6 |
| with: |
| name: native-lib-linux |
| path: native/target/ci/libcomet.so |
| retention-days: 1 |
| |
| - name: Save Cargo cache |
| uses: actions/cache/save@v5 |
| if: github.ref == 'refs/heads/main' |
| with: |
| path: | |
| ~/.cargo/registry |
| ~/.cargo/git |
| native/target |
| key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}-${{ hashFiles('native/**/*.rs') }} |
| |
| spark-sql-test: |
| needs: build-native |
| strategy: |
| matrix: |
| os: [ubuntu-24.04] |
| module: |
| - {name: "catalyst", args1: "catalyst/test", args2: ""} |
| - {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} |
| - {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"} |
| - {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"} |
| - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} |
| - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"} |
| - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} |
| # Test combinations: |
| # - auto scan: all Spark versions (3.4, 3.5, 4.0) |
| # - native_iceberg_compat: Spark 3.5 only |
| config: |
| - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'} |
| - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'} |
| - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_datafusion'} |
| - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'} |
| # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946 |
| exclude: |
| - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'} |
| module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} |
| fail-fast: false |
| name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }} |
| runs-on: ${{ matrix.os }} |
| container: |
| image: amd64/rust |
| steps: |
| - uses: actions/checkout@v6 |
| - name: Setup Rust & Java toolchain |
| uses: ./.github/actions/setup-builder |
| with: |
| rust-version: ${{env.RUST_VERSION}} |
| jdk-version: ${{ matrix.config.java }} |
| - name: Download native library |
| uses: actions/download-artifact@v7 |
| with: |
| name: native-lib-linux |
| path: native/target/release/ |
| - name: Setup Spark |
| uses: ./.github/actions/setup-spark-builder |
| with: |
| spark-version: ${{ matrix.config.spark-full }} |
| spark-short-version: ${{ matrix.config.spark-short }} |
| skip-native-build: true |
| - name: Run Spark tests |
| run: | |
| cd apache-spark |
| rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups |
| NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \ |
| build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" |
| if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then |
| find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log |
| fi |
| env: |
| LC_ALL: "C.UTF-8" |
| - name: Upload fallback log |
| if: ${{ github.event.inputs.collect-fallback-logs == 'true' }} |
| uses: actions/upload-artifact@v6 |
| with: |
| name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }} |
| path: "**/fallback.log" |
| |
| merge-fallback-logs: |
| if: ${{ github.event.inputs.collect-fallback-logs == 'true' }} |
| name: merge-fallback-logs |
| needs: [spark-sql-test] |
| runs-on: ubuntu-24.04 |
| steps: |
| - name: Download fallback log artifacts |
| uses: actions/download-artifact@v7 |
| with: |
| path: fallback-logs/ |
| - name: Merge fallback logs |
| run: | |
| find ./fallback-logs/ -type f -name "fallback.log" -print0 | xargs -0 cat | sort -u > all_fallback.log |
| - name: Upload merged fallback log |
| uses: actions/upload-artifact@v6 |
| with: |
| name: all-fallback-log |
| path: all_fallback.log |