Remove pull_request_target workflow
diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml
deleted file mode 100644
index 8f7dd86..0000000
--- a/.github/workflows/build-images.yml
+++ /dev/null
@@ -1,403 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
----
-name: "Build Images"
-run-name: >
- Build images for ${{ github.event.pull_request.title }} ${{ github.event.pull_request._links.html.href }}
-on: # yamllint disable-line rule:truthy
- pull_request_target:
- branches:
- - main
-permissions:
- # all other permissions are set to none
- contents: read
- pull-requests: read
- packages: read
-env:
- ANSWER: "yes"
- DB_RESET: "true"
- VERBOSE: "true"
- GITHUB_REPOSITORY: ${{ github.repository }}
- GITHUB_USERNAME: ${{ github.actor }}
- # You can override CONSTRAINTS_GITHUB_REPOSITORY by setting secret in your repo but by default the
- # Airflow one is going to be used
- CONSTRAINTS_GITHUB_REPOSITORY: >-
- ${{ secrets.CONSTRAINTS_GITHUB_REPOSITORY != '' &&
- secrets.CONSTRAINTS_GITHUB_REPOSITORY || 'apache/airflow' }}
- # This token is WRITE one - pull_request_target type of events always have the WRITE token
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}"
- USE_SUDO: "true"
- INCLUDE_SUCCESS_OUTPUTS: "true"
- # Version of Python used for reproducibility of the packages built
- # Python 3.8 tarfile produces different tarballs than Python 3.9+ tarfile that's why we are forcing
- # Python 3.9 for all release preparation commands to make sure that the tarballs are reproducible
- # TODO: remove me when we switch to Python 3.9 as minimal version
- REPRODUCIBLE_PYTHON_VERSION: "3.9"
-
-concurrency:
- group: build-${{ github.event.pull_request.number || github.ref }}
- cancel-in-progress: true
-
-jobs:
- build-info:
- timeout-minutes: 10
- name: "Build Info"
- runs-on: 'ubuntu-22.04'
- env:
- TARGET_BRANCH: ${{ github.event.pull_request.base.ref }}
- outputs:
- python-versions: ${{ steps.selective-checks.outputs.python-versions }}
- upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }}
- all-python-versions-list-as-string: >-
- ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }}
- default-python-version: ${{ steps.selective-checks.outputs.default-python-version }}
- run-tests: ${{ steps.selective-checks.outputs.run-tests }}
- run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }}
- ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }}
- prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }}
- cache-directive: ${{ steps.selective-checks.outputs.cache-directive }}
- default-branch: ${{ steps.selective-checks.outputs.default-branch }}
- default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }}
- runs-on: ${{steps.selective-checks.outputs.runs-on}}
- is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }}
- is-committer-build: ${{ steps.selective-checks.outputs.is-committer-build }}
- is-airflow-runner: ${{ steps.selective-checks.outputs.is-airflow-runner }}
- is-amd-runner: ${{ steps.selective-checks.outputs.is-amd-runner }}
- is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }}
- is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }}
- is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }}
- chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }}
- target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha ||
- github.event.pull_request.head.sha ||
- github.sha
- }}"
- if: github.repository == 'apache/airflow'
- steps:
- - name: Discover PR merge commit
- id: discover-pr-merge-commit
- run: |
- TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)"
- echo "TARGET_COMMIT_SHA=$TARGET_COMMIT_SHA" >> ${GITHUB_ENV}
- echo "target-commit-sha=${TARGET_COMMIT_SHA}" >> ${GITHUB_OUTPUT}
- if: github.event_name == 'pull_request_target'
- # The labels in the event aren't updated when re-triggering the job, So lets hit the API to get
- # up-to-date values
- - name: Get latest PR labels
- id: get-latest-pr-labels
- run: |
- echo -n "pull-request-labels=" >> ${GITHUB_OUTPUT}
- gh api graphql --paginate -F node_id=${{github.event.pull_request.node_id}} -f query='
- query($node_id: ID!, $endCursor: String) {
- node(id:$node_id) {
- ... on PullRequest {
- labels(first: 100, after: $endCursor) {
- nodes { name }
- pageInfo { hasNextPage endCursor }
- }
- }
- }
- }' --jq '.data.node.labels.nodes[]' | jq --slurp -c '[.[].name]' >> ${GITHUB_OUTPUT}
- if: github.event_name == 'pull_request_target'
- - name: Cleanup repo
- run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- - uses: actions/checkout@v4
- with:
- ref: ${{ env.TARGET_COMMIT_SHA }}
- persist-credentials: false
- fetch-depth: 2
- ####################################################################################################
- # WE ONLY DO THAT CHECKOUT ABOVE TO RETRIEVE THE TARGET COMMIT AND IT'S PARENT. DO NOT RUN ANY CODE
- # RIGHT AFTER THAT AS WE ARE GOING TO RESTORE THE TARGET BRANCH CODE IN THE NEXT STEP.
- ####################################################################################################
- - name: Checkout target branch to use ci/scripts and breeze from there.
- uses: actions/checkout@v4
- with:
- ref: ${{ github.base_ref }}
- persist-credentials: false
- ####################################################################################################
- # HERE EVERYTHING IS PERFECTLY SAFE TO RUN. AT THIS POINT WE HAVE THE TARGET BRANCH CHECKED OUT
- # AND WE CAN RUN ANY CODE FROM IT. WE CAN RUN BREEZE COMMANDS, WE CAN RUN SCRIPTS, WE CAN RUN
- # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT
- # CODE WILL BE RUN FROM THE PR.
- ####################################################################################################
- - name: "Setup python"
- uses: actions/setup-python@v4
- with:
- python-version: 3.8
- - name: "Install Breeze"
- uses: ./.github/actions/breeze
- ####################################################################################################
- # WE RUN SELECTIVE CHECKS HERE USING THE TARGET COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM
- # AND SEE WHAT HAS CHANGED IN THE PR. THE CODE IS STILL RUN FROM THE TARGET BRANCH, SO IT IS SAFE
- # TO RUN IT, WE ONLY PASS TARGET_COMMIT_SHA SO THAT SELECTIVE CHECKS CAN SEE WHAT'S COMING IN THE PR
- ####################################################################################################
- - name: Selective checks
- id: selective-checks
- env:
- PR_LABELS: "${{ steps.get-latest-pr-labels.outputs.pull-request-labels }}"
- COMMIT_REF: "${{ env.TARGET_COMMIT_SHA }}"
- VERBOSE: "false"
- AIRFLOW_SOURCES_ROOT: "${{ github.workspace }}"
- run: breeze ci selective-check 2>> ${GITHUB_OUTPUT}
- - name: env
- run: printenv
- env:
- PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pull-request-labels }}
- GITHUB_CONTEXT: ${{ toJson(github) }}
-
- build-ci-images:
- strategy:
- fail-fast: true
- matrix:
- python-version: ${{fromJson(needs.build-info.outputs.python-versions)}}
- permissions:
- contents: read
- packages: write
- timeout-minutes: 80
- name: Build CI image ${{ matrix.python-version }}
- runs-on: ["ubuntu-22.04"]
- needs: [build-info]
- if: |
- needs.build-info.outputs.ci-image-build == 'true' &&
- github.event.pull_request.head.repo.full_name != 'apache/airflow'
- env:
- DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }}
- DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }}
- RUNS_ON: "${{ needs.build-info.outputs.runs-on }}"
- BACKEND: sqlite
- VERSION_SUFFIX_FOR_PYPI: "dev0"
- steps:
- - name: Cleanup repo
- run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- - uses: actions/checkout@v4
- with:
- ref: ${{ needs.build-info.outputs.target-commit-sha }}
- persist-credentials: false
- ####################################################################################################
- # BE VERY CAREFUL HERE! THIS LINE AND THE END OF THE WARNING. HERE WE CHECK OUT THE TARGET
- # COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM BUT ALSO TO BE ABLE TO BUILD THE IMAGE FROM
- # THE INCOMING PR, RATHER THAN FROM TARGET BRANCH. THIS IS A SECURITY RISK, BECAUSE THE PR
- # CAN CONTAIN ANY CODE AND WE EXECUTE IT HERE. THEREFORE, WE NEED TO BE VERY CAREFUL WHAT WE
- # DO HERE. WE SHOULD NOT EXECUTE ANY CODE THAT COMES FROM THE PR. WE SHOULD NOT RUN ANY BREEZE
- # COMMAND NOR SCRIPTS NOR COMPOSITE ACTIONS. WE SHOULD ONLY RUN CODE THAT IS EMBEDDED DIRECTLY IN
- # THIS WORKFLOW - BECAUSE THIS IS THE ONLY CODE THAT WE CAN TRUST.
- ####################################################################################################
- - name: Checkout target branch to 'target-airflow' folder to use ci/scripts and breeze from there.
- uses: actions/checkout@v4
- with:
- path: "target-airflow"
- ref: ${{ github.base_ref }}
- persist-credentials: false
- if: needs.build-info.outputs.is-committer-build != 'true'
- - name: >
- Replace "scripts/ci", "dev" and ".github/actions" with the target branch
- so that the those directories are not coming from the PR
- shell: bash
- run: |
- echo
- echo -e "\033[33m Replace scripts, dev, actions with target branch for non-committer builds!\033[0m"
- echo
- rm -rfv "scripts/ci"
- mv -v "target-airflow/scripts/ci" "scripts"
- rm -rfv "dev"
- mv -v "target-airflow/dev" "."
- rm -rfv ".github/actions"
- mv -v "target-airflow/.github/actions" ".github"
- if: needs.build-info.outputs.is-committer-build != 'true'
- ####################################################################################################
- # HERE IT'S A BIT SAFER. THE `dev`, `scripts/ci` AND `.github/actions` ARE NOW COMING FROM THE
- # BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE SAVE TO RUN.
- # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN
- # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS
- # ISOLATED FROM THE RUNNER.
- ####################################################################################################
- - name: Build CI Image ${{ matrix.python-version }}:${{env.IMAGE_TAG}}
- uses: ./.github/actions/build-ci-images
- with:
- python-version: ${{ matrix.python-version }}
- env:
- UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}
- DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }}
- PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}}
- DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }}
- BUILD_TIMEOUT_MINUTES: 70
-
- build-prod-images:
- strategy:
- fail-fast: true
- matrix:
- python-version: ${{fromJson(needs.build-info.outputs.python-versions)}}
- permissions:
- contents: read
- packages: write
- timeout-minutes: 80
- name: Build PROD image ${{ matrix.python-version }}
- runs-on: ["ubuntu-22.04"]
- needs: [build-info, build-ci-images]
- if: |
- needs.build-info.outputs.prod-image-build == 'true' &&
- github.event.pull_request.head.repo.full_name != 'apache/airflow'
- env:
- DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }}
- DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }}
- RUNS_ON: "${{ needs.build-info.outputs.runs-on }}"
- BACKEND: sqlite
- VERSION_SUFFIX_FOR_PYPI: "dev0"
- INCLUDE_NOT_READY_PROVIDERS: "true"
- steps:
- - name: Cleanup repo
- run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- - uses: actions/checkout@v4
- with:
- ref: ${{ needs.build-info.outputs.target-commit-sha }}
- persist-credentials: false
- ####################################################################################################
- # BE VERY CAREFUL HERE! THIS LINE AND THE END OF THE WARNING. HERE WE CHECK OUT THE TARGET
- # COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM BUT ALSO TO BE ABLE TO BUILD THE IMAGE FROM
- # THE INCOMING PR, RATHER THAN FROM TARGET BRANCH. THIS IS A SECURITY RISK, BECAUSE THE PR
- # CAN CONTAIN ANY CODE AND WE EXECUTE IT HERE. THEREFORE, WE NEED TO BE VERY CAREFUL WHAT WE
- # DO HERE. WE SHOULD NOT EXECUTE ANY CODE THAT COMES FROM THE PR. WE SHOULD NOT RUN ANY BREEZE
- # COMMAND NOR SCRIPTS NOR COMPOSITE ACTIONS. WE SHOULD ONLY RUN CODE THAT IS EMBEDDED DIRECTLY IN
- # THIS WORKFLOW - BECAUSE THIS IS THE ONLY CODE THAT WE CAN TRUST.
- ####################################################################################################
- - name: Checkout target branch to 'target-airflow' folder to use ci/scripts and breeze from there.
- uses: actions/checkout@v4
- with:
- path: "target-airflow"
- ref: ${{ github.base_ref }}
- persist-credentials: false
- if: needs.build-info.outputs.is-committer-build != 'true'
- - name: >
- Replace "scripts/ci", "dev" and ".github/actions" with the target branch
- so that the those directories are not coming from the PR
- shell: bash
- run: |
- echo
- echo -e "\033[33m Replace scripts, dev, actions with target branch for non-committer builds!\033[0m"
- echo
- rm -rfv "scripts/ci"
- mv -v "target-airflow/scripts/ci" "scripts"
- rm -rfv "dev"
- mv -v "target-airflow/dev" "."
- rm -rfv ".github/actions"
- mv -v "target-airflow/.github/actions" ".github"
- if: needs.build-info.outputs.is-committer-build != 'true'
- ####################################################################################################
- # HERE IT'S A BIT SAFER. THE `dev`, `scripts/ci` AND `.github/actions` ARE NOW COMING FROM THE
- # BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE SAVE TO RUN.
- # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN
- # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS
- # ISOLATED FROM THE RUNNER.
- ####################################################################################################
- - name: "Install Breeze"
- uses: ./.github/actions/breeze
- with:
- python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
- - name: Build PROD Image ${{ matrix.python-version }}:${{env.IMAGE_TAG}}
- uses: ./.github/actions/build-prod-images
- with:
- build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }}
- chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }}
- python-version: ${{ matrix.python-version }}
- env:
- UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}
- DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }}
- PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}}
- DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }}
-
- build-ci-images-arm:
- timeout-minutes: 50
- name: "Build ARM CI images ${{needs.build-info.outputs.all-python-versions-list-as-string}}"
- runs-on: ${{fromJSON(needs.build-info.outputs.runs-on)}}
- needs: [build-info, build-prod-images]
- # We can change the job to run on ASF ARM runners and do not start our instance once we enable ASF runners
- if: |
- needs.build-info.outputs.ci-image-build == 'true' &&
- needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' &&
- github.event.pull_request.head.repo.full_name != 'apache/airflow' &&
- needs.build-info.outputs.is-self-hosted-runner == 'true' &&
- needs.build-info.outputs.is-airflow-runner == 'true'
- env:
- DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }}
- DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }}
- RUNS_ON: "${{ needs.build-info.outputs.runs-on }}"
- BACKEND: sqlite
- outputs: ${{toJSON(needs.build-info.outputs) }}
- VERSION_SUFFIX_FOR_PYPI: "dev0"
- steps:
- - name: Cleanup repo
- run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- - uses: actions/checkout@v4
- with:
- ref: ${{ needs.build-info.outputs.target-commit-sha }}
- persist-credentials: false
- ####################################################################################################
- # BE VERY CAREFUL HERE! THIS LINE AND THE END OF THE WARNING. HERE WE CHECK OUT THE TARGET
- # COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM BUT ALSO TO BE ABLE TO BUILD THE IMAGE FROM
- # THE INCOMING PR, RATHER THAN FROM TARGET BRANCH. THIS IS A SECURITY RISK, BECAUSE THE PR
- # CAN CONTAIN ANY CODE AND WE EXECUTE IT HERE. THEREFORE, WE NEED TO BE VERY CAREFUL WHAT WE
- # DO HERE. WE SHOULD NOT EXECUTE ANY CODE THAT COMES FROM THE PR. WE SHOULD NOT RUN ANY BREEZE
- # COMMAND NOR SCRIPTS NOR COMPOSITE ACTIONS. WE SHOULD ONLY RUN CODE THAT IS EMBEDDED DIRECTLY IN
- # THIS WORKFLOW - BECAUSE THIS IS THE ONLY CODE THAT WE CAN TRUST.
- ####################################################################################################
- - name: Checkout target branch to 'target-airflow' folder to use ci/scripts and breeze from there.
- uses: actions/checkout@v4
- with:
- path: "target-airflow"
- ref: ${{ github.base_ref }}
- persist-credentials: false
- if: needs.build-info.outputs.is-committer-build != 'true'
- - name: >
- Replace "scripts/ci", "dev" and ".github/actions" with the target branch
- so that the those directories are not coming from the PR
- shell: bash
- run: |
- echo
- echo -e "\033[33m Replace scripts, dev, actions with target branch for non-committer builds!\033[0m"
- echo
- rm -rfv "scripts/ci"
- mv -v "target-airflow/scripts/ci" "scripts"
- rm -rfv "dev"
- mv -v "target-airflow/dev" "."
- rm -rfv ".github/actions"
- mv -v "target-airflow/.github/actions" ".github"
- if: needs.build-info.outputs.is-committer-build != 'true'
- ####################################################################################################
- # HERE IT'S A BIT SAFER. THE `dev`, `scripts/ci` AND `.github/actions` ARE NOW COMING FROM THE
- # BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE SAVE TO RUN.
- # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN
- # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS
- # ISOLATED FROM THE RUNNER.
- ####################################################################################################
- - name: "Start ARM instance"
- run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh
- - name: "Install Breeze"
- uses: ./.github/actions/breeze
- - name: >
- Build ARM CI images ${{ env.IMAGE_TAG }}
- ${{needs.build-info.outputs.all-python-versions-list-as-string}}
- run: >
- breeze ci-image build --run-in-parallel --builder airflow_cache --platform "linux/arm64"
- env:
- UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}
- DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }}
- PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}}
- - name: "Stop ARM instance"
- run: ./scripts/ci/images/ci_stop_arm_instance.sh
- if: always()