| name: Run benchmarks |
| |
| on: |
| workflow_dispatch: |
| inputs: |
| class: |
| description: 'Benchmark class' |
| required: true |
| default: '*' |
| jdk: |
| description: 'JDK version: 8 or 11' |
| required: true |
| default: '8' |
| failfast: |
| description: 'Failfast: true or false' |
| required: true |
| default: 'true' |
| num-splits: |
| description: 'Number of job splits' |
| required: true |
| default: '1' |
| |
| jobs: |
| matrix-gen: |
| name: Generate matrix for job splits |
| runs-on: ubuntu-20.04 |
| outputs: |
| matrix: ${{ steps.set-matrix.outputs.matrix }} |
| env: |
| SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }} |
| steps: |
| - name: Generate matrix |
| id: set-matrix |
| run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]" |
| |
| benchmark: |
| name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)" |
| needs: matrix-gen |
| # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04. |
| runs-on: ubuntu-20.04 |
| strategy: |
| fail-fast: false |
| matrix: |
| split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}} |
| env: |
| SPARK_BENCHMARK_FAILFAST: ${{ github.event.inputs.failfast }} |
| SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }} |
| SPARK_BENCHMARK_CUR_SPLIT: ${{ matrix.split }} |
| SPARK_GENERATE_BENCHMARK_FILES: 1 |
| SPARK_LOCAL_IP: localhost |
| steps: |
| - name: Checkout Spark repository |
| uses: actions/checkout@v2 |
| # In order to get diff files |
| with: |
| fetch-depth: 0 |
| - name: Cache Scala, SBT and Maven |
| uses: actions/cache@v2 |
| with: |
| path: | |
| build/apache-maven-* |
| build/scala-* |
| build/*.jar |
| ~/.sbt |
| key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} |
| restore-keys: | |
| build- |
| - name: Cache Coursier local repository |
| uses: actions/cache@v2 |
| with: |
| path: ~/.cache/coursier |
| key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} |
| restore-keys: | |
| benchmark-coursier-${{ github.event.inputs.jdk }} |
| - name: Install Java ${{ github.event.inputs.jdk }} |
| uses: actions/setup-java@v1 |
| with: |
| java-version: ${{ github.event.inputs.jdk }} |
| - name: Run benchmarks |
| run: | |
| ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package |
| # Make less noisy |
| cp conf/log4j.properties.template conf/log4j.properties |
| sed -i 's/log4j.rootCategory=INFO, console/log4j.rootCategory=WARN, console/g' conf/log4j.properties |
| # In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit. |
| bin/spark-submit \ |
| --driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \ |
| --jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \ |
| "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \ |
| "${{ github.event.inputs.class }}" |
| # To keep the directory structure and file permissions, tar them |
| # See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files |
| echo "Preparing the benchmark results:" |
| tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only` |
| - name: Upload benchmark results |
| uses: actions/upload-artifact@v2 |
| with: |
| name: benchmark-results-${{ github.event.inputs.jdk }}-${{ matrix.split }} |
| path: benchmark-results-${{ github.event.inputs.jdk }}.tar |
| |