| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| name: Velox Backend (x86) |
| |
| on: |
| pull_request: |
| paths: |
| - '.github/workflows/velox_backend_x86.yml' |
| - 'pom.xml' |
| - 'backends-velox/**' |
| - 'gluten-uniffle/**' |
| - 'gluten-celeborn/**' |
| - 'gluten-ras/**' |
| - 'gluten-core/**' |
| - 'gluten-substrait/**' |
| - 'gluten-arrow/**' |
| - 'gluten-delta/**' |
| - 'gluten-iceberg/**' |
| - 'gluten-hudi/**' |
| - 'gluten-paimon/**' |
| - 'gluten-ut/**' |
| - 'package/**' |
| - 'shims/**' |
| - 'tools/gluten-it/**' |
| - 'ep/build-velox/**' |
| - 'cpp/**' |
| - 'dev/**' |
| |
| env: |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true |
| MVN_CMD: 'mvn -ntp' |
| WGET_CMD: 'wget -nv' |
| SETUP: 'source .github/workflows/util/setup_helper.sh' |
| CCACHE_DIR: "${{ github.workspace }}/.ccache" |
| # spark.sql.ansi.enabled defaults to false. |
| SPARK_ANSI_SQL_MODE: false |
| |
| concurrency: |
| group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} |
| cancel-in-progress: true |
| |
| jobs: |
| build-native-lib-centos-7: |
| runs-on: ubuntu-22.04 |
| steps: |
| - uses: actions/checkout@v4 |
| - name: Get Ccache |
| uses: actions/cache/restore@v4 |
| with: |
| path: '${{ env.CCACHE_DIR }}' |
| key: ccache-centos7-release-default-${{github.sha}} |
| restore-keys: | |
| ccache-centos7-release-default |
| - name: Build Gluten native libraries |
| run: | |
| docker pull apache/gluten:vcpkg-centos-7 |
| docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c " |
| set -e |
| yum install tzdata -y |
| df -a |
| cd /work |
| export CCACHE_DIR=/work/.ccache |
| mkdir -p /work/.ccache |
| bash dev/ci-velox-buildstatic-centos-7.sh |
| ccache -s |
| mkdir -p /work/.m2/repository/org/apache/arrow/ |
| cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/ |
| " |
| |
| - name: "Save ccache" |
| uses: actions/cache/save@v4 |
| id: ccache |
| with: |
| path: '${{ env.CCACHE_DIR }}' |
| key: ccache-centos7-release-default-${{github.sha}} |
| - uses: actions/upload-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| if-no-files-found: error |
| - uses: actions/upload-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: .m2/repository/org/apache/arrow/ |
| if-no-files-found: error |
| |
| tpc-test-ubuntu: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| os: [ "ubuntu:20.04", "ubuntu:22.04" ] |
| spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0" ] |
| java: [ "java-8", "java-11", "java-17" ] |
| # Spark supports JDK17 since 3.3. |
| exclude: |
| - spark: spark-3.2 |
| java: java-17 |
| - spark: spark-3.4 |
| java: java-17 |
| - spark: spark-3.5 |
| java: java-17 |
| - spark: spark-3.2 |
| java: java-11 |
| - spark: spark-3.3 |
| java: java-11 |
| - spark: spark-3.4 |
| java: java-11 |
| - os: ubuntu:22.04 |
| java: java-17 |
| - os: ubuntu:22.04 |
| java: java-11 |
| - spark: spark-4.0 |
| java: java-8 |
| - spark: spark-4.0 |
| java: java-11 |
| |
| runs-on: ubuntu-22.04 |
| container: ${{ matrix.os }} |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Setup tzdata |
| run: | |
| #sed -i 's|http://archive|http://us.archive|g' /etc/apt/sources.list |
| if [ "${{ matrix.os }}" = "ubuntu:22.04" ]; then |
| apt-get update |
| TZ="Etc/GMT" DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata |
| fi |
| - name: Setup java and maven |
| run: | |
| if [ "${{ matrix.java }}" = "java-17" ]; then |
| apt-get update && apt-get install -y openjdk-17-jdk maven |
| apt remove openjdk-11* -y |
| elif [ "${{ matrix.java }}" = "java-11" ]; then |
| apt-get update && apt-get install -y openjdk-11-jdk maven |
| else |
| apt-get update && apt-get install -y openjdk-8-jdk maven |
| apt remove openjdk-11* -y |
| fi |
| ls -l /root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/ |
| - name: Build and run TPC-H / TPC-DS |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 |
| echo "JAVA_HOME: $JAVA_HOME" |
| if [ "${{ matrix.spark }}" = "spark-4.0" ]; then |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests |
| else |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests |
| fi |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| |
| tpc-test-centos8: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| os: [ "centos:8" ] |
| spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0" ] |
| java: [ "java-8", "java-11", "java-17" ] |
| # Spark supports JDK17 since 3.3. |
| exclude: |
| - spark: spark-3.2 |
| java: java-17 |
| - spark: spark-3.4 |
| java: java-17 |
| - spark: spark-3.5 |
| java: java-17 |
| - spark: spark-3.2 |
| java: java-11 |
| - spark: spark-3.3 |
| java: java-11 |
| - spark: spark-3.4 |
| java: java-11 |
| - spark: spark-4.0 |
| java: java-8 |
| - spark: spark-4.0 |
| java: java-11 |
| |
| runs-on: ubuntu-22.04 |
| container: ${{ matrix.os }} |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Update mirror list |
| run: | |
| if [ "${{ matrix.os }}" = "centos:7" ] || [ "${{ matrix.os }}" = "centos:8" ]; then |
| sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true |
| sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true |
| fi |
| - name: Setup java and maven |
| run: | |
| if [ "${{ matrix.java }}" = "java-17" ]; then |
| yum update -y && yum install -y java-17-openjdk-devel wget |
| elif [ "${{ matrix.java }}" = "java-11" ]; then |
| yum update -y && yum install -y java-11-openjdk-devel wget |
| else |
| yum update -y && yum install -y java-1.8.0-openjdk-devel wget |
| fi |
| $SETUP install_maven |
| - name: Set environment variables |
| run: | |
| if [ "${{ matrix.java }}" = "java-17" ]; then |
| echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk" >> $GITHUB_ENV |
| elif [ "${{ matrix.java }}" = "java-11" ]; then |
| echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk" >> $GITHUB_ENV |
| else |
| echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV |
| fi |
| - name: Build gluten-it |
| run: | |
| echo "JAVA_HOME: $JAVA_HOME" |
| cd $GITHUB_WORKSPACE/ |
| if [ "${{ matrix.spark }}" = "spark-4.0" ]; then |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests |
| else |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests |
| fi |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} |
| - name: Run TPC-H / TPC-DS |
| run: | |
| echo "JAVA_HOME: $JAVA_HOME" |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| - name: Run TPC-H / TPC-DS with RAS |
| run: | |
| echo "JAVA_HOME: $JAVA_HOME" |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ |
| --extra-conf=spark.gluten.ras.enabled=true \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ |
| --extra-conf=spark.gluten.ras.enabled=true |
| |
| tpc-test-centos7: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5" ] |
| java: [ "java-8" ] |
| runs-on: ubuntu-22.04 |
| steps: |
| - uses: actions/checkout@v4 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: .m2/repository/org/apache/arrow/ |
| - name: Build and run TPCH/DS tests |
| run: | |
| docker pull centos:7 |
| docker run -v $GITHUB_WORKSPACE:/work -v /$GITHUB_WORKSPACE/.m2:/root/.m2/ -w /work \ |
| -e matrix.java=${{ matrix.java }} -e matrix.spark=${{ matrix.spark }} \ |
| centos:7 \ |
| bash -c " |
| sed -i -e 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-* || true |
| sed -i -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* || true |
| |
| # Setup java and maven |
| yum update -y && yum install -y java-1.8.0-openjdk-devel wget tzdata python3-pip |
| $SETUP install_maven |
| # Set environment variables |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk |
| |
| # Build gluten-it |
| mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests |
| cd /work/tools/gluten-it |
| mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} |
| |
| # Run TPC-H / TPC-DS |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| |
| # Run TPC-H / TPC-DS with RAS |
| cd /work/tools/gluten-it |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ |
| --extra-conf=spark.gluten.ras.enabled=true \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ |
| --extra-conf=spark.gluten.ras.enabled=true |
| " |
| |
| tpc-test-ubuntu-oom: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| spark: [ "spark-3.2" ] |
| runs-on: ubuntu-22.04 |
| steps: |
| - name: Maximize build disk space |
| shell: bash |
| run: | |
| df -h |
| set -euo pipefail |
| echo "Removing unwanted software... " |
| sudo rm -rf /usr/share/dotnet |
| sudo rm -rf /usr/local/lib/android |
| sudo rm -rf /opt/ghc |
| sudo rm -rf /opt/hostedtoolcache/CodeQL |
| sudo docker image prune --all --force > /dev/null |
| df -h |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /home/runner/.m2/repository/org/apache/arrow/ |
| - name: Setup java and maven |
| run: | |
| sudo apt-get update |
| sudo apt-get install -y openjdk-8-jdk maven |
| - name: Set environment variables |
| run: | |
| echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV |
| - name: Build for Spark ${{ matrix.spark }} |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| $MVN_CMD clean install -P${{ matrix.spark }} |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 \ |
| --excluded-dims=OFFHEAP_SIZE:4g |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q67 low memory, memory isolation on |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q95 low memory, memory isolation on |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ |
| -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ |
| -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ |
| -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ |
| -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ |
| -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ |
| -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ |
| -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory, memory isolation on |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ |
| -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ |
| -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ |
| -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip -m=OffHeapExecutionMemory \ |
| --extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=0 \ |
| -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ |
| -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ |
| -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ |
| -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g \ |
| -d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \ |
| -d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0 |
| |
| tpc-test-ubuntu-randomkill: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| spark: [ "spark-3.2" ] |
| runs-on: ubuntu-22.04 |
| steps: |
| - name: Maximize build disk space |
| shell: bash |
| run: | |
| df -h |
| set -euo pipefail |
| echo "Removing unwanted software... " |
| sudo rm -rf /usr/share/dotnet |
| sudo rm -rf /usr/local/lib/android |
| sudo rm -rf /opt/ghc |
| sudo rm -rf /opt/hostedtoolcache/CodeQL |
| sudo docker image prune --all --force > /dev/null |
| df -h |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /home/runner/.m2/repository/org/apache/arrow/ |
| - name: Setup java and maven |
| run: | |
| sudo apt-get update |
| sudo apt-get install -y openjdk-8-jdk maven |
| - name: Set environment variables |
| run: | |
| echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV |
| - name: Build for Spark ${{ matrix.spark }} |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests |
| cd $GITHUB_WORKSPACE/tools/gluten-it |
| $MVN_CMD clean install -P${{ matrix.spark }} |
| GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 |
| - name: TPC-DS SF30.0 Parquet local spark3.2 random kill tasks |
| run: | |
| cd tools/gluten-it \ |
| && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \ |
| --local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \ |
| --data-gen=skip --random-kill-tasks --no-session-reuse |
| |
| tpc-test-centos8-uniffle: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| spark: [ "spark-3.2" ] |
| uniffle: [ "0.9.2" ] |
| hadoop: [ "2.8.5" ] |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Install Uniffle ${{ matrix.uniffle }} |
| run: | |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk |
| if [ ! -e "/opt/apache-uniffle-${{ matrix.uniffle }}-incubating-bin.tar.gz" ]; then |
| ${WGET_CMD} https://archive.apache.org/dist/incubator/uniffle/${{ matrix.uniffle }}/apache-uniffle-${{ matrix.uniffle }}-incubating-bin.tar.gz -P /opt |
| fi |
| if [ ! -e "/opt/hadoop-${{ matrix.hadoop }}.tar.gz" ]; then |
| ${WGET_CMD} https://archive.apache.org/dist/hadoop/common/hadoop-${{ matrix.hadoop }}/hadoop-${{ matrix.hadoop }}.tar.gz -P /opt |
| fi |
| cd /opt && \ |
| mkdir /opt/uniffle && tar xzf apache-uniffle-${{ matrix.uniffle }}-incubating-bin.tar.gz -C /opt/uniffle --strip-components=1 && \ |
| tar xzf hadoop-${{ matrix.hadoop }}.tar.gz -C /opt/ && \ |
| cd /opt/uniffle && mkdir shuffle_data && \ |
| bash -c "echo -e 'XMX_SIZE=16g\nHADOOP_HOME=/opt/hadoop-${{ matrix.hadoop }}' > ./bin/rss-env.sh" && \ |
| bash -c "echo -e 'rss.coordinator.shuffle.nodes.max 1\nrss.rpc.server.port 19999' > ./conf/coordinator.conf" && \ |
| bash -c "echo -e 'rss.server.app.expired.withoutHeartbeat 7200000\nrss.server.heartbeat.delay 3000\nrss.rpc.server.port 19997\nrss.rpc.server.type GRPC_NETTY\nrss.jetty.http.port 19996\nrss.server.netty.port 19995\nrss.storage.basePath /opt/uniffle/shuffle_data\nrss.storage.type MEMORY_LOCALFILE\nrss.coordinator.quorum localhost:19999\nrss.server.flush.thread.alive 10\nrss.server.single.buffer.flush.threshold 64m' > ./conf/server.conf" && \ |
| bash ./bin/start-coordinator.sh && bash ./bin/start-shuffle-server.sh |
| - name: Build for Spark ${{ matrix.spark }} |
| run: | |
| cd $GITHUB_WORKSPACE/ && \ |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Puniffle -DskipTests |
| - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with uniffle-${{ matrix.uniffle }} |
| run: | |
| export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \ |
| cd $GITHUB_WORKSPACE/tools/gluten-it && \ |
| $MVN_CMD clean install -P${{ matrix.spark }} -Puniffle && \ |
| GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox-with-uniffle --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 |
| |
| tpc-test-ubuntu-2204-celeborn: |
| needs: build-native-lib-centos-7 |
| strategy: |
| fail-fast: false |
| matrix: |
| spark: [ "spark-3.2" ] |
| celeborn: [ "celeborn-0.6.1", "celeborn-0.5.4", "celeborn-0.4.3"] |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Native Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases/ |
| - name: Download All Arrow Jar Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build for Spark ${{ matrix.spark }} |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Pceleborn -DskipTests |
| - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{ matrix.celeborn }} |
| run: | |
| EXTRA_PROFILE="" |
| if [ "${{ matrix.celeborn }}" = "celeborn-0.4.3" ]; then |
| EXTRA_PROFILE="-Pceleborn-0.4" |
| elif [ "${{ matrix.celeborn }}" = "celeborn-0.5.4" ]; then |
| EXTRA_PROFILE="-Pceleborn-0.5" |
| elif [ "${{ matrix.celeborn }}" = "celeborn-0.6.1" ]; then |
| EXTRA_PROFILE="-Pceleborn-0.6" |
| fi |
| echo "EXTRA_PROFILE: ${EXTRA_PROFILE}" |
| if [ ! -e "/opt/apache-${{ matrix.celeborn }}-bin.tgz" ]; then |
| echo "WARNING: please pre-install your required package in docker image since the downloading is throttled by this site." |
| wget -nv https://archive.apache.org/dist/celeborn/${{ matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz -P /opt/ |
| fi |
| cd /opt && mkdir -p celeborn && \ |
| tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \ |
| mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ |
| bash -c "echo -e 'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g' > ./conf/celeborn-env.sh" && \ |
| bash -c "echo -e 'celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64' > ./conf/celeborn-defaults.conf" && \ |
| bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ |
| cd $GITHUB_WORKSPACE/tools/gluten-it && $MVN_CMD clean install -Pspark-3.2 -Pceleborn ${EXTRA_PROFILE} && \ |
| GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 && \ |
| GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 && \ |
| GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox-with-celeborn --extra-conf=spark.celeborn.client.spark.shuffle.writer=sort \ |
| --extra-conf=spark.celeborn.push.sortMemory.threshold=8m --benchmark-type=ds --error-on-memleak \ |
| --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 |
| GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ |
| --local --preset=velox-with-celeborn --extra-conf=spark.celeborn.client.spark.shuffle.writer=sort \ |
| --extra-conf=spark.gluten.sql.columnar.shuffle.celeborn.useRssSort=false \ |
| --extra-conf=spark.celeborn.push.sortMemory.threshold=8m --benchmark-type=ds --error-on-memleak \ |
| --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 |
| |
| spark-test-spark32: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.2.2 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and run unit test for Spark 3.2.2 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Piceberg \ |
| -Pdelta -Phudi -DargLine="-Dspark.test.home=/opt/shims/spark32/spark_home/" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| - name: Upload golden files |
| if: failure() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-golden-files |
| path: /tmp/tpch-approved-plan/** |
| |
| spark-test-spark32-slow: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build and run unit test for Spark 3.2.2 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Piceberg -Pdelta -Phudi \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark32/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark33: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.3.1 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.3.1 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| - name: Upload golden files |
| if: failure() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-golden-files |
| path: /tmp/tpch-approved-plan/** |
| |
| spark-test-spark33-slow: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| |
| - name: Build and Run unit test for Spark 3.3.1 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \ |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark34: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare spark.test.home for Spark 3.4.4 (other tests) |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.4.4 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.4.4 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| export SPARK_HOME=/opt/shims/spark34/spark_home/ |
| ls -l $SPARK_HOME |
| $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest \ |
| -DargLine="-Dspark.test.home=$SPARK_HOME" |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| - name: Upload golden files |
| if: failure() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-golden-files |
| path: /tmp/tpch-approved-plan/** |
| |
| spark-test-spark34-slow: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build and Run unit test for Spark 3.4.4 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| export SPARK_HOME=/opt/shims/spark34/spark_home/ |
| ls -l $SPARK_HOME |
| $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ |
| -DargLine="-Dspark.test.home=$SPARK_HOME" |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.5.5 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| - name: Upload golden files |
| if: failure() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-golden-files |
| path: /tmp/tpch-approved-plan/** |
| |
| spark-test-spark35-scala213: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.5.5 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.5.5 with scala-2.13 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.13 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Piceberg \ |
| -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark35-scala-2.13/spark_home/" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35-slow: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \ |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest |
| - name: Upload test report |
| if: always() |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35-ras: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.5.5 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35-slow-ras: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \ |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest |
| - name: Upload test report |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35-smj: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Prepare |
| run: | |
| dnf module -y install python39 && \ |
| alternatives --set python3 /usr/bin/python3.9 && \ |
| pip3 install setuptools==77.0.3 && \ |
| pip3 install pyspark==3.5.5 cython && \ |
| pip3 install pandas==2.2.3 pyarrow==20.0.0 |
| - name: Build and Run unit test for Spark 3.5.5 (other tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| export SPARK_SCALA_VERSION=2.12 |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \ |
| -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest |
| - name: Upload test report |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| spark-test-spark35-slow-smj: |
| needs: build-native-lib-centos-7 |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Download All Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: velox-native-lib-centos-7-${{github.sha}} |
| path: ./cpp/build/releases |
| - name: Download Arrow Jars |
| uses: actions/download-artifact@v4 |
| with: |
| name: arrow-jars-centos-7-${{github.sha}} |
| path: /root/.m2/repository/org/apache/arrow/ |
| - name: Build and Run unit test for Spark 3.5.5 (slow tests) |
| run: | |
| cd $GITHUB_WORKSPACE/ |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \ |
| -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest |
| - name: Upload test report |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: | |
| **/target/*.log |
| **/gluten-ut/**/hs_err_*.log |
| **/gluten-ut/**/core.* |
| |
| cpp-test-udf-test: |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-8-jdk8 |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Get Ccache |
| uses: actions/cache/restore@v4 |
| with: |
| path: '${{ env.CCACHE_DIR }}' |
| key: ccache-centos8-release-shared-${{runner.arch}}-${{github.sha}} |
| restore-keys: | |
| ccache-centos8-release-shared-${{runner.arch}} |
| - name: Build Gluten native libraries |
| run: | |
| df -a |
| bash dev/ci-velox-buildshared-centos-8.sh |
| ccache -s |
| - name: Run CPP unit test |
| run: | |
| cd ./cpp/build && ctest -V |
| - name: Run CPP benchmark test |
| run: | |
| $MVN_CMD test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \ |
| -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none -DfailIfNoTests=false -Dexec.skip |
| # This test depends on files generated by the above mvn test. |
| ./cpp/build/velox/benchmarks/generic_benchmark --with-shuffle --partitioning hash --threads 1 --iterations 1 \ |
| --conf $(realpath backends-velox/generated-native-benchmark/conf_12_0_*.ini) \ |
| --plan $(realpath backends-velox/generated-native-benchmark/plan_12_0_*.json) \ |
| --data $(realpath backends-velox/generated-native-benchmark/data_12_0_*_0.parquet),$(realpath backends-velox/generated-native-benchmark/data_12_0_*_1.parquet) |
| - name: Run UDF test |
| run: | |
| # Depends on --build_example=ON. |
| yum install -y java-17-openjdk-devel |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk |
| export PATH=$JAVA_HOME/bin:$PATH |
| java -version |
| $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Pjava-17 -Piceberg -Pdelta -Ppaimon -DtagsToExclude=org.apache.gluten.tags.EnhancedFeaturesTest \ |
| -DtagsToInclude=org.apache.gluten.tags.UDFTest \ |
| -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" |
| - name: Upload test report |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-report |
| path: '**/surefire-reports/TEST-*.xml' |
| - name: Upload unit tests log files |
| if: ${{ !success() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ github.job }}-test-log |
| path: "**/target/*.log" |
| |
| build-cudf-centos-9: |
| runs-on: ubuntu-22.04 |
| container: apache/gluten:centos-9-jdk8-cudf |
| steps: |
| - uses: actions/checkout@v2 |
| - name: Get Ccache |
| uses: actions/cache/restore@v4 |
| with: |
| path: '${{ env.CCACHE_DIR }}' |
| key: ccache-centos9-release-shared-${{runner.arch}}-${{github.sha}} |
| restore-keys: | |
| ccache-centos9-release-shared-${{runner.arch}} |
| - name: Build Gluten native libraries |
| run: | |
| df -a |
| bash dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --spark_version=3.4 --enable_gpu=ON |
| ccache -s |