| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| name: Python Cost Benchmarks Dataflow |
| |
| on: |
| schedule: |
| - cron: '30 18 * * 6' # Run at 6:30 pm UTC on Saturdays |
| workflow_dispatch: |
| |
| #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event |
| permissions: |
| actions: write |
| pull-requests: read |
| checks: read |
| contents: read |
| deployments: read |
| id-token: none |
| issues: read |
| discussions: read |
| packages: read |
| pages: read |
| repository-projects: read |
| security-events: read |
| statuses: read |
| |
| # This allows a subsequently queued workflow run to interrupt previous runs |
| concurrency: |
| group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' |
| cancel-in-progress: true |
| |
| env: |
| DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} |
| GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} |
| GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} |
| INFLUXDB_USER: ${{ secrets.INFLUXDB_USER }} |
| INFLUXDB_USER_PASSWORD: ${{ secrets.INFLUXDB_USER_PASSWORD }} |
| |
| jobs: |
| beam_Python_Cost_Benchmarks_Dataflow: |
| if: | |
| github.event_name == 'workflow_dispatch' || |
| (github.event_name == 'schedule' && github.repository == 'apache/beam') |
| runs-on: [self-hosted, ubuntu-20.04, main] |
| timeout-minutes: 900 |
| name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) |
| strategy: |
| matrix: |
| job_name: ["beam_Python_CostBenchmark_Dataflow"] |
| job_phrase: ["Run Python Dataflow Cost Benchmarks"] |
| steps: |
| - uses: actions/checkout@v4 |
| - name: Setup repository |
| uses: ./.github/actions/setup-action |
| with: |
| comment_phrase: ${{ matrix.job_phrase }} |
| github_token: ${{ secrets.GITHUB_TOKEN }} |
| github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) |
| - name: Setup Python environment |
| uses: ./.github/actions/setup-environment-action |
| with: |
| python-version: '3.10' |
| - name: Prepare test arguments |
| uses: ./.github/actions/test-arguments-action |
| with: |
| test-type: load |
| test-language: python |
| argument-file-paths: | |
| ${{ github.workspace }}/.github/workflows/cost-benchmarks-pipeline-options/python_wordcount.txt |
| ${{ github.workspace }}/.github/workflows/cost-benchmarks-pipeline-options/python_tf_mnist_classification.txt |
| # The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>" |
| - name: get current time |
| run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV |
| - name: Run wordcount on Dataflow |
| uses: ./.github/actions/gradle-command-self-hosted-action |
| timeout-minutes: 30 |
| with: |
| gradle-command: :sdks:python:apache_beam:testing:load_tests:run |
| arguments: | |
| -PloadTest.mainClass=apache_beam.testing.benchmarks.wordcount.wordcount \ |
| -Prunner=DataflowRunner \ |
| -PpythonVersion=3.10 \ |
| -PloadTest.requirementsTxtFile=apache_beam/testing/benchmarks/wordcount/requirements.txt \ |
| '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-wordcount-python-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_wordcount-${{env.NOW_UTC}}.txt' \ |
| - name: Run Tensorflow MNIST Image Classification on Dataflow |
| uses: ./.github/actions/gradle-command-self-hosted-action |
| timeout-minutes: 30 |
| with: |
| gradle-command: :sdks:python:apache_beam:testing:load_tests:run |
| arguments: | |
| -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.tensorflow_mnist_classification_cost_benchmark \ |
| -Prunner=DataflowRunner \ |
| -PpythonVersion=3.10 \ |
| -PloadTest.requirementsTxtFile=apache_beam/ml/inference/tensorflow_tests_requirements.txt \ |
| '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-tf-mnist-classification-python-${{env.NOW_UTC}} --input_file=gs://apache-beam-ml/testing/inputs/it_mnist_data.csv --output_file=gs://temp-storage-for-end-to-end-tests/inference/result_tf_mnist-${{env.NOW_UTC}}.txt --model=gs://apache-beam-ml/models/tensorflow/mnist/' \ |