Merge pull request #14551: Cherrypick #14325 to 2.29.0 release branch: GitHub Actions changes to build RC wheels and deploy

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 8dff639..1583d5a 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -59,6 +59,9 @@
   build_source:
     runs-on: ubuntu-latest
     name: Build python source distribution
+    outputs:
+      is_rc: ${{ steps.is_rc.outputs.is_rc }}
+      rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
@@ -71,6 +74,24 @@
         run: python -m pip install -r build-requirements.txt
       - name: Install wheels
         run: python -m pip install wheel
+      - name: Get tag
+        id: get_tag
+        run: |
+          echo ::set-output name=TAG::${GITHUB_REF#refs/*/}
+      - name: Check whether an -RC tag was applied to the commit.
+        id: is_rc
+        run: |
+          echo ${{ steps.get_tag.outputs.TAG }} > temp
+          OUTPUT=$( if  grep -e '-RC.' -q temp; then echo 1; else echo 0; fi)
+          echo "::set-output name=is_rc::$OUTPUT"
+      - name: Get RELEASE_VERSION and RC_NUM
+        if: steps.is_rc.outputs.is_rc == 1
+        id: get_rc_version
+        run: |
+          RC_NUM=$(sed -n "s/^.*-RC\([0-9]*\)/\1/p" temp)
+          RELEASE_VERSION=$(sed -n "s/^v\(.*\)-RC[0-9]/\1/p" temp)
+          echo "::set-output name=RC_NUM::$RC_NUM"
+          echo "::set-output name=RELEASE_VERSION::$RELEASE_VERSION"
       - name: Build source
         working-directory: ./sdks/python
         run: python setup.py sdist --formats=zip
@@ -95,6 +116,50 @@
         with:
           name: source_zip
           path: sdks/python/dist
+      - name: Clear dist
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python
+        run: |
+          rm -r ./dist
+          rm -rd apache-beam-source
+      - name: Rewrite SDK version to include RC number
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python
+        run: |
+          RELEASE_VERSION=${{ steps.get_rc_version.outputs.RELEASE_VERSION }}
+          RC_NUM=${{ steps.get_rc_version.outputs.RC_NUM }}
+          sed -i -e "s/${RELEASE_VERSION}/${RELEASE_VERSION}rc${RC_NUM}/g" apache_beam/version.py
+      - name: Build RC source
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python
+        run: python setup.py sdist --formats=zip
+      - name: Add RC checksums
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python/dist
+        run: |
+          file=$(ls | grep .zip | head -n 1)
+          sha512sum $file > ${file}.sha512
+      - name: Unzip RC source
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python
+        run: unzip dist/$(ls dist | grep .zip | head -n 1)
+      - name: Rename RC source directory
+        if: steps.is_rc.outputs.is_rc == 1
+        working-directory: ./sdks/python
+        run: mv $(ls | grep apache-beam) apache-beam-source-rc
+      - name: Upload RC source as artifact
+        if: steps.is_rc.outputs.is_rc == 1
+        uses: actions/upload-artifact@v2
+        with:
+          name: source_rc${{ steps.get_rc_version.outputs.RC_NUM }}
+          path: sdks/python/apache-beam-source-rc
+      - name: Upload compressed RC sources as artifacts
+        if: steps.is_rc.outputs.is_rc == 1
+        uses: actions/upload-artifact@v2
+        with:
+          name: source_zip_rc${{ steps.get_rc_version.outputs.RC_NUM }}
+          path: sdks/python/dist
+
 
   prepare_gcs:
     name: Prepare GCS
@@ -150,6 +215,12 @@
       with:
         name: source
         path: apache-beam-source
+    - name: Download Python SDK RC source distribution from artifacts
+      if: ${{ needs.build_source.outputs.is_rc == 1 }}
+      uses: actions/download-artifact@v2
+      with:
+        name: source_rc${{ needs.build_source.outputs.rc_num }}
+        path: apache-beam-source-rc
     - name: Install Python
       uses: actions/setup-python@v2
       with:
@@ -178,6 +249,28 @@
       with:
         name: wheelhouse-${{ matrix.os_python.os }}
         path: apache-beam-source/wheelhouse/
+    - name: Build RC wheels
+      if: ${{ needs.build_source.outputs.is_rc == 1 }}
+      working-directory: apache-beam-source-rc
+      env:
+        CIBW_BUILD: ${{ matrix.os_python.python }}
+        CIBW_BEFORE_BUILD: pip install cython
+      run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse
+      shell: bash
+    - name: Add RC checksums
+      if: ${{ needs.build_source.outputs.is_rc == 1 }}
+      working-directory: apache-beam-source-rc/wheelhouse/
+      run: |
+        for file in *.whl; do
+          sha512sum $file > ${file}.sha512
+        done
+      shell: bash
+    - name: Upload RC wheels as artifacts
+      if: ${{ needs.build_source.outputs.is_rc == 1 }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: wheelhouse-rc${{ needs.build_source.outputs.rc_num }}-${{ matrix.os_python.os }}
+        path: apache-beam-source-rc/wheelhouse/
 
   upload_wheels_to_gcs:
     name: Upload wheels to GCS bucket
diff --git a/release/src/main/scripts/choose_rc_commit.sh b/release/src/main/scripts/choose_rc_commit.sh
old mode 100644
new mode 100755
diff --git a/release/src/main/scripts/deploy_release_candidate_pypi.sh b/release/src/main/scripts/deploy_release_candidate_pypi.sh
new file mode 100755
index 0000000..bf13fd6
--- /dev/null
+++ b/release/src/main/scripts/deploy_release_candidate_pypi.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+#
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+# This script will deploy a Release Candidate to pypi, includes:
+# 1. Download python binary artifacts
+# 2. Deploy Release Candidate to pypi
+
+set -e
+
+function usage() {
+  echo 'Usage: deploy_release_candidate_pypi.sh --release <version> --rc <rc> --user <user> [--deploy]'
+}
+
+RELEASE=
+RC_NUMBER=
+COMMIT=
+USER_GITHUB_ID=
+DEPLOY=no
+BEAM_ROOT_DIR=beam
+GIT_REPO_BASE_URL=apache/beam
+GIT_REPO_URL=https://github.com/${GIT_REPO_BASE_URL}
+
+while [[ $# -gt 0 ]] ; do
+  arg="$1"
+
+  case $arg in
+      --release)
+      shift
+      RELEASE=$1
+      shift
+      ;;
+
+      --rc)
+      shift
+      RC_NUMBER=$1
+      shift
+      ;;
+
+      --user)
+      shift
+      USER_GITHUB_ID=$1
+      shift
+      ;;
+
+      --deploy)
+      DEPLOY=yes
+      shift
+      ;;
+
+      *)
+      usage
+      exit 1
+      ;;
+   esac
+done
+
+if [[ -z "$RELEASE" ]] ; then
+  echo 'No release version supplied.'
+  usage
+  exit 1
+fi
+
+if [[ -z "$RC_NUMBER" ]] ; then
+  echo 'No RC number supplied'
+  usage
+  exit 1
+fi
+
+if [[ -z "$USER_GITHUB_ID" ]] ; then
+  echo 'No github user supplied.'
+  usage
+  exit 1
+fi
+
+function clean_up(){
+  echo "Do you want to clean local clone repo ${LOCAL_CLONE_DIR}? [y|N]"
+  read confirmation
+  if [[ $confirmation = "y" ]]; then
+    cd ~
+    rm -rf ${LOCAL_CLONE_DIR}
+    echo "Cleaned up local repo."
+  fi
+}
+
+RC_TAG="v${RELEASE}-RC${RC_NUMBER}"
+LOCAL_CLONE_DIR="beam_release_${RC_TAG}"
+
+echo "================Checking Environment Variables=============="
+echo "will download artifacts for ${RC_TAG} built by github actions"
+echo "Please review the release version and confirm: [y|N]"
+read confirmation
+if [[ $confirmation != "y" ]]; then
+  echo "Please rerun this script and make sure you have the right inputs."
+  exit
+fi
+
+echo "=====================Clear folder=============================="
+cd ~
+if [[ -d ${LOCAL_CLONE_DIR} ]]; then
+  echo "Deleting existing local clone repo ${LOCAL_CLONE_DIR}."
+  rm -rf "${LOCAL_CLONE_DIR}"
+fi
+mkdir "${LOCAL_CLONE_DIR}"
+LOCAL_CLONE_DIR_ROOT=$(pwd)/${LOCAL_CLONE_DIR}
+cd $LOCAL_CLONE_DIR
+
+echo "===================Cloning Beam Release Branch=================="
+git clone --depth 1 --branch "${RC_TAG}" ${GIT_REPO_URL} "${BEAM_ROOT_DIR}"
+cd $BEAM_ROOT_DIR
+RELEASE_COMMIT=$(git rev-list -n 1 $RC_TAG)
+
+echo "================Download python artifacts======================"
+PYTHON_ARTIFACTS_DIR="${LOCAL_CLONE_DIR_ROOT}/python"
+python "./release/src/main/scripts/download_github_actions_artifacts.py" \
+  --github-user "${USER_GITHUB_ID}" \
+  --repo-url "${GIT_REPO_BASE_URL}" \
+  --release-branch "${RC_TAG}" \
+  --release-commit "${RELEASE_COMMIT}" \
+  --artifacts_dir "${PYTHON_ARTIFACTS_DIR}" \
+  --rc_number "${RC_NUMBER}"
+
+cd "${PYTHON_ARTIFACTS_DIR}"
+
+echo "------Checking Hash Value for apache-beam-${RELEASE}rc${RC_NUMBER}.zip-----"
+sha512sum -c "apache-beam-${RELEASE}rc${RC_NUMBER}.zip.sha512"
+
+for artifact in *.whl; do
+  echo "----------Checking Hash Value for ${artifact} wheel-----------"
+  sha512sum -c "${artifact}.sha512"
+done
+
+echo "===================Removing sha512 files======================="
+rm $(ls | grep -i ".*.sha512$")
+
+echo "====================Upload rc to pypi========================"
+virtualenv deploy_pypi_env
+source ./deploy_pypi_env/bin/activate
+pip install twine
+
+mkdir dist && mv $(ls | grep apache) dist && cd dist
+echo "Will upload the following files to PyPI:"
+ls
+echo "Are the files listed correct? [y|N]"
+read confirmation
+if [[ $confirmation != "y" ]]; then
+  echo "Exiting without deploying artifacts to PyPI."
+  clean_up
+  exit
+fi
+
+if [[ "$DEPLOY" == yes ]] ; then
+  twine upload *
+else
+  echo "Skipping deployment to PyPI. Run the script with --deploy to stage the artifacts."
+fi
+
+clean_up
\ No newline at end of file
diff --git a/release/src/main/scripts/download_github_actions_artifacts.py b/release/src/main/scripts/download_github_actions_artifacts.py
index 3453fa0..3b83995 100644
--- a/release/src/main/scripts/download_github_actions_artifacts.py
+++ b/release/src/main/scripts/download_github_actions_artifacts.py
@@ -38,7 +38,7 @@
 def parse_arguments():
   """
   Gets all neccessary data from the user by parsing arguments or asking for input.
-  Return: github_token, user_github_id, repo_url, release_branch, release_commit, artifacts_dir
+  Return: github_token, user_github_id, repo_url, release_branch, release_commit, artifacts_dir, rc_number
   """
   parser = argparse.ArgumentParser(
       description=
@@ -49,6 +49,7 @@
   parser.add_argument("--release-branch", required=True)
   parser.add_argument("--release-commit", required=True)
   parser.add_argument("--artifacts_dir", required=True)
+  parser.add_argument("--rc_number", required=False, default="")
 
   args = parser.parse_args()
   github_token = ask_for_github_token()
@@ -66,8 +67,9 @@
   release_commit = args.release_commit
   artifacts_dir = args.artifacts_dir if os.path.isabs(args.artifacts_dir) \
     else os.path.abspath(args.artifacts_dir)
+  rc_number = args.rc_number
 
-  return github_token, user_github_id, repo_url, release_branch, release_commit, artifacts_dir
+  return github_token, user_github_id, repo_url, release_branch, release_commit, artifacts_dir, rc_number
 
 
 def ask_for_github_token():
@@ -254,18 +256,27 @@
   os.makedirs(artifacts_dir)
 
 
-def fetch_github_artifacts(run_id, repo_url, artifacts_dir, github_token):
+def filter_artifacts(artifacts, rc_number):
+  def filter_source(artifact_name):
+    if rc_number:
+      return artifact_name.startswith("source_zip_rc{}".format(rc_number))
+    return artifact_name.startswith("source_zip") and "_rc" not in artifact_name
+
+  def filter_wheels(artifact_name):
+    if rc_number:
+      return artifact_name.startswith("wheelhouse-rc{}".format(rc_number))
+    return artifact_name.startswith("wheelhouse") and "-rc" not in artifact_name
+
+  return [a for a in artifacts if (filter_source(a["name"]) or filter_wheels(a["name"]))]
+
+def fetch_github_artifacts(run_id, repo_url, artifacts_dir, github_token, rc_number):
   """Downloads and extracts github artifacts with source dist and wheels from given run."""
   print("Starting downloading artifacts ... (it may take a while)")
   run_data = get_single_workflow_run_data(run_id, repo_url, github_token)
   artifacts_url = safe_get(run_data, "artifacts_url")
   data_artifacts = request_url(artifacts_url, github_token)
   artifacts = safe_get(data_artifacts, "artifacts", artifacts_url)
-  filtered_artifacts = [
-      a for a in artifacts if (
-          a["name"].startswith("source_zip") or
-          a["name"].startswith("wheelhouse"))
-  ]
+  filtered_artifacts = filter_artifacts(artifacts, rc_number)
   for artifact in filtered_artifacts:
     url = safe_get(artifact, "archive_download_url")
     name = safe_get(artifact, "name")
@@ -311,6 +322,7 @@
       release_branch,
       release_commit,
       artifacts_dir,
+      rc_number,
   ) = parse_arguments()
 
   try:
@@ -319,7 +331,7 @@
         workflow_id, repo_url, release_branch, release_commit, github_token)
     validate_run(run_id, repo_url, github_token)
     prepare_directory(artifacts_dir)
-    fetch_github_artifacts(run_id, repo_url, artifacts_dir, github_token)
+    fetch_github_artifacts(run_id, repo_url, artifacts_dir, github_token, rc_number)
     print("Script finished successfully!")
     print(f"Artifacts available in directory: {artifacts_dir}")
   except KeyboardInterrupt as e:
diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md
index 89cf1ed..6105228 100644
--- a/website/www/site/content/en/contribute/release-guide.md
+++ b/website/www/site/content/en/contribute/release-guide.md
@@ -538,7 +538,7 @@
       ./beam/release/src/main/scripts/choose_rc_commit.sh \
           --release "${RELEASE_VERSION}" \
           --rc "${RC_NUM}" \
-          --commit "${COMMIT_REF}" \
+	  --commit "${COMMIT_REF}" \
           --clone \
           --push-tag
 
@@ -598,6 +598,35 @@
          Artifact names should follow [the existing format](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.beam%22) in which artifact name mirrors directory structure, e.g., `beam-sdks-java-io-kafka`.
          Carefully review any new artifacts.
 
+### Upload release candidate to PyPi
+
+* **Script:** [deploy_release_candidate_pypi.sh](https://github.com/apache/beam/blob/master/release/src/main/scripts/deploy_release_candidate_pypi.sh)
+
+* **Usage**
+
+		./release/src/main/scripts/deploy_release_candidate_pypi.sh \
+		    --release "${RELEASE_VERSION}" \
+		    --rc "${RC_NUM}" \
+		    --user "${GITHUB_USER}" \
+		    --deploy
+
+* **The script will:**
+	1. Download python binary artifacts
+	1. Deploy release candidate to PyPI
+
+__Attention:__ Verify that:
+* The File names version include ``rc-#`` suffix
+* [Download Files](https://pypi.org/project/apache-beam/#files) have:
+  * All wheels uploaded as artifacts
+  * Release source's zip published
+  * Signatures and hashes do not need to be uploaded
+
+You can do a dry run by omitting the `--deploy` flag. Then it will only download the release candidate binaries. If it looks good, rerun it with `--deploy`.
+
+See the source of the script for more details or to run commands manually in case of a problem.
+
+
+
 **********