.github/workflows/benchmark.yml - spark - Git at Google

 name: Run benchmarks

 on:
   workflow_dispatch:
     inputs:
       class:
         description: 'Benchmark class'
         required: true
         default: '*'
       jdk:
         description: 'JDK version: 8 or 11'
         required: true
         default: '8'
       failfast:
         description: 'Failfast: true or false'
         required: true
         default: 'true'
       num-splits:
         description: 'Number of job splits'
         required: true
         default: '1'

 jobs:
   matrix-gen:
     name: Generate matrix for job splits
     runs-on: ubuntu-20.04
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     env:
       SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
     steps:
     - name: Generate matrix
       id: set-matrix
       run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"

   benchmark:
     name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
     needs: matrix-gen
     # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
         split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
     env:
       SPARK_BENCHMARK_FAILFAST: ${{ github.event.inputs.failfast }}
       SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
       SPARK_BENCHMARK_CUR_SPLIT: ${{ matrix.split }}
       SPARK_GENERATE_BENCHMARK_FILES: 1
       SPARK_LOCAL_IP: localhost
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       # In order to get diff files
       with:
         fetch-depth: 0
     - name: Cache Scala, SBT and Maven
       uses: actions/cache@v2
       with:
         path: |
           build/apache-maven-*
           build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
         path: ~/.cache/coursier
         key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           benchmark-coursier-${{ github.event.inputs.jdk }}
     - name: Install Java ${{ github.event.inputs.jdk }}
       uses: actions/setup-java@v1
       with:
         java-version: ${{ github.event.inputs.jdk }}
     - name: Run benchmarks
       run: |
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package
         # Make less noisy
         cp conf/log4j.properties.template conf/log4j.properties
         sed -i 's/log4j.rootCategory=INFO, console/log4j.rootCategory=WARN, console/g' conf/log4j.properties
         # In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
         bin/spark-submit \
           --driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
           --jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
           "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
           "${{ github.event.inputs.class }}"
         # To keep the directory structure and file permissions, tar them
         # See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
         echo "Preparing the benchmark results:"
         tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only`
     - name: Upload benchmark results
       uses: actions/upload-artifact@v2
       with:
         name: benchmark-results-${{ github.event.inputs.jdk }}-${{ matrix.split }}
         path: benchmark-results-${{ github.event.inputs.jdk }}.tar
	name: Run benchmarks

	on:
	workflow_dispatch:
	inputs:
	class:
	description: 'Benchmark class'
	required: true
	default: '*'
	jdk:
	description: 'JDK version: 8 or 11'
	required: true
	default: '8'
	failfast:
	description: 'Failfast: true or false'
	required: true
	default: 'true'
	num-splits:
	description: 'Number of job splits'
	required: true
	default: '1'

	jobs:
	matrix-gen:
	name: Generate matrix for job splits
	runs-on: ubuntu-20.04
	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}
	env:
	SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
	steps:
	- name: Generate matrix
	id: set-matrix
	run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"

	benchmark:
	name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
	needs: matrix-gen
	# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
	runs-on: ubuntu-20.04
	strategy:
	fail-fast: false
	matrix:
	split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
	env:
	SPARK_BENCHMARK_FAILFAST: ${{ github.event.inputs.failfast }}
	SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
	SPARK_BENCHMARK_CUR_SPLIT: ${{ matrix.split }}
	SPARK_GENERATE_BENCHMARK_FILES: 1
	SPARK_LOCAL_IP: localhost
	steps:
	- name: Checkout Spark repository
	uses: actions/checkout@v2
	# In order to get diff files
	with:
	fetch-depth: 0
	- name: Cache Scala, SBT and Maven
	uses: actions/cache@v2
	with:
	path: \|
	build/apache-maven-*
	build/scala-*
	build/*.jar
	~/.sbt
	key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
	restore-keys: \|
	build-
	- name: Cache Coursier local repository
	uses: actions/cache@v2
	with:
	path: ~/.cache/coursier
	key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('/pom.xml', '/plugins.sbt') }}
	restore-keys: \|
	benchmark-coursier-${{ github.event.inputs.jdk }}
	- name: Install Java ${{ github.event.inputs.jdk }}
	uses: actions/setup-java@v1
	with:
	java-version: ${{ github.event.inputs.jdk }}
	- name: Run benchmarks
	run: \|
	./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package
	# Make less noisy
	cp conf/log4j.properties.template conf/log4j.properties
	sed -i 's/log4j.rootCategory=INFO, console/log4j.rootCategory=WARN, console/g' conf/log4j.properties
	# In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
	bin/spark-submit \
	--driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
	--jars "`find . -name '-SNAPSHOT-tests.jar' -o -name 'avro*-SNAPSHOT.jar' \| paste -sd ',' -`" \
	"`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
	"${{ github.event.inputs.class }}"
	# To keep the directory structure and file permissions, tar them
	# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
	echo "Preparing the benchmark results:"
	tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only`
	- name: Upload benchmark results
	uses: actions/upload-artifact@v2
	with:
	name: benchmark-results-${{ github.event.inputs.jdk }}-${{ matrix.split }}
	path: benchmark-results-${{ github.event.inputs.jdk }}.tar