blob: 15979a9e1acd534577ffe2d07fb5a565b3152521 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# To learn more about GitHub Actions in Apache Beam check the CI.m
name: Run RC Validation
on:
workflow_dispatch:
inputs:
RELEASE_VER:
description: Beam current Release Version
required: true
default: 2.42.0
USER_GCS_BUCKET:
description: Bucket to upload results
required: true
default: gs://rc-validation-migration-tests
RC_NUM:
description: RC number
required: true
default: "1"
APACHE_CONTENTS_REPO:
description: Apache Contents Repository
required: true
default: https://repository.apache.org/content/repositories/orgapachebeam-1285
RUN_SQL_TAXI_WITH_DATAFLOW:
description: Include SQL Taxi with Dataflow?
required: true
type: boolean
default: true
RUN_PYTHON_CROSS_VALIDATION:
description: Include Python Cross Validation?
required: true
type: boolean
default: true
RUN_DIRECT_RUNNER_TESTS:
description: Include Direct Runner Leaderboard & Gamestates?
required: true
type: boolean
default: true
RUN_DATAFLOW_RUNNER_TESTS:
description: Include Dataflow Runner Leaderboard & Gamestates?
type: boolean
required: true
default: true
env:
RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}"
RELEASE_VER: ${{github.event.inputs.RELEASE_VER}}
USER_GCP_PROJECT: apache-beam-testing
PYTHON_RC_DOWNLOAD_URL: https://dist.apache.org/repos/dist/dev/beam
USER_GCP_REGION: us-central1
USER_GCP_ZONE: us-central1-a
APACHE_CONTENTS_REPO: ${{github.event.inputs.APACHE_CONTENTS_REPO}}
FIXED_WINDOW_DURATION: 20
jobs:
python_release_candidate:
runs-on: [self-hosted, ubuntu-20.04]
permissions:
pull-requests: write
contents: write
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RELEASE_BRANCH: "release-${{github.event.inputs.RELEASE_VER}}"
WORKING_BRANCH: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}_validations"
steps:
- name: Check out code
uses: actions/checkout@v4
with:
ref: ${{ env.RC_TAG }}
- name: Setup GitHub CLI
uses: ./.github/actions/setup-gh-cli-linux
- name: Set git config
run: |
git config user.name $GITHUB_ACTOR
git config user.email actions@"$RUNNER_NAME".local
- name: Verify working branch name
run: ./scripts/ci/ci_check_git_branch.sh $WORKING_BRANCH
- name: Create Pull Request
run: |
git checkout -b ${{env.WORKING_BRANCH}} ${{ env.RC_TAG }} --quiet
touch empty_file.json
git add empty_file.json
git commit -m "Add empty file in order to create PR" --quiet
git push origin ${{env.WORKING_BRANCH}} --quiet
GITHUB_PR_URL=$(gh pr create -B ${{env.RELEASE_BRANCH}} -H ${{env.WORKING_BRANCH}} -t"[DO NOT MERGE] Run Python RC Validation Tests" -b "PR to run Python ReleaseCandidate Jenkins Job.")
- name: Comment on PR to Trigger Python ReleaseCandidate Test
run: |
gh pr comment "$GITHUB_PR_URL" --body "Run Python ReleaseCandidate"
sql_taxi_with_dataflow:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_SQL_TAXI_WITH_DATAFLOW == 'true'}}
strategy:
matrix:
py_version: [3.8]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setup Java JDK
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 11
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Setting Taxi Variables
run: |
echo "SQL_TAXI_TOPIC=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
echo "SQL_TAXI_SUBSCRIPTION=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
echo "SQL_EXPANSION_SERVICE_JAR=${APACHE_CONTENTS_REPO}/org/apache/beam/beam-sdks-java-extensions-sql-expansion-service/${RELEASE_VER}/beam-sdks-java-extensions-sql-expansion-service-${RELEASE_VER}.jar" >> $GITHUB_ENV
- name: Creating Pub/Sub Topics
run: |
gcloud pubsub topics create --project=${USER_GCP_PROJECT} ${SQL_TAXI_TOPIC}
gcloud pubsub subscriptions create --project=${USER_GCP_PROJECT} --topic=${SQL_TAXI_TOPIC} ${SQL_TAXI_SUBSCRIPTION}
- name: XLang SQL Taxi with DataflowRunner
run: |
echo '***************************************************';
echo '* Running Python XLang SQL Taxi with DataflowRunner';
echo '***************************************************';
timeout --preserve-status 20m python -m apache_beam.examples.sql_taxi \
--project=${USER_GCP_PROJECT} \
--region=${USER_GCP_REGION} \
--runner DataflowRunner \
--num_workers 5 \
--output_topic projects/${USER_GCP_PROJECT}/topics/${SQL_TAXI_TOPIC} \
--beam_services="{\":sdks:java:extensions:sql:expansion-service:shadowJar\": \"${SQL_EXPANSION_SERVICE_JAR}\"}" \
--sdk_location apache-beam-${RELEASE_VER}.tar.gz || true
- name: Checking Results
run: |
gcloud pubsub subscriptions pull --project=${USER_GCP_PROJECT} --limit=5 ${SQL_TAXI_SUBSCRIPTION}
gcloud pubsub subscriptions pull --project=${USER_GCP_PROJECT} --limit=5 ${SQL_TAXI_SUBSCRIPTION}
- name: Removing Pub Sub Topic
if: always()
run: |
gcloud pubsub topics delete --project=${USER_GCP_PROJECT} ${SQL_TAXI_TOPIC}
gcloud pubsub subscriptions delete --project=${USER_GCP_PROJECT} ${SQL_TAXI_SUBSCRIPTION}
python_cross_validation:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_PYTHON_CROSS_VALIDATION == 'true'}}
strategy:
matrix:
py_version: [3.8]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Verify ENV values
run: |
echo ""
echo "====================Checking Environment & Variables================="
echo ""
echo "running validations on release ${{github.event.inputs.RELEASE_VER}} RC${{github.event.inputs.RC_NUM}}."
- name: Install Kubectl
uses: azure/setup-kubectl@v3
- name: Setup Java JDK
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 11
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Installing gcloud-auth-plugin
run: sudo apt-get install google-cloud-sdk-gke-gcloud-auth-plugin
- name: Setting Kafka Cluster Name
run: |
echo "KAFKA_CLUSTER_NAME=xlang-kafka-cluster-$RANDOM">> $GITHUB_ENV
- name: Creating Kafka Cluster
run: |
gcloud container clusters create --project=${USER_GCP_PROJECT} --region=${USER_GCP_REGION} --no-enable-ip-alias $KAFKA_CLUSTER_NAME
kubectl apply -R -f .test-infra/kubernetes/kafka-cluster
- name: Waiting for Kafka cluster to be ready
run: kubectl wait --for=condition=Ready pod/kafka-0 --timeout=1200s
- name: Start xlang Kafka Taxi with Dataflow Runner
run: |
echo "BOOTSTRAP_SERVERS=$(kubectl get svc outside-0 -o jsonpath='{.status.loadBalancer.ingress[0].ip}'):32400" >> $GITHUB_ENV
echo "KAFKA_TAXI_DF_DATASET=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
echo "KAFKA_EXPANSION_SERVICE_JAR=${APACHE_CONTENTS_REPO}/org/apache/beam/beam-sdks-java-io-expansion-service/${RELEASE_VER}/beam-sdks-java-io-expansion-service-${RELEASE_VER}.jar" >> $GITHUB_ENV
- name: Creating BigQuery Dataset
run: bq mk --project_id=${USER_GCP_PROJECT} ${KAFKA_TAXI_DF_DATASET}
- name: Running Xlang Kafka Taxi with Dataflow Runner
run: |
echo '*****************************************************';
echo '* Running Python XLang Kafka Taxi with DataflowRunner';
echo '*****************************************************';
echo "***************************************************************";
timeout --preserve-status 20m python -m apache_beam.examples.kafkataxi.kafka_taxi \
--project=${USER_GCP_PROJECT} \
--region=${USER_GCP_REGION} \
--topic beam-runnerv2 \
--bootstrap_servers ${BOOTSTRAP_SERVERS} \
--bq_dataset ${KAFKA_TAXI_DF_DATASET} \
--runner DataflowRunner \
--num_workers 5 \
--temp_location=${USER_GCS_BUCKET}/temp/ \
--with_metadata \
--beam_services="{\"sdks:java:io:expansion-service:shadowJar\": \"${KAFKA_EXPANSION_SERVICE_JAR}\"}" \
--sdk_location apache-beam-${RELEASE_VER}.tar.gz || true
- name: Checking executions results
run: |
bq head -n 10 ${KAFKA_TAXI_DF_DATASET}.xlang_kafka_taxi
- name: Remove BigQuery Dataset
if: always()
run: |
bq rm -f ${KAFKA_TAXI_DF_DATASET}.xlang_kafka_taxi
bq rm -f ${KAFKA_TAXI_DF_DATASET}
- name: Delete Kafka Cluster
if: always()
run: gcloud container clusters delete --project=${USER_GCP_PROJECT} --region=${USER_GCP_REGION} --async -q $KAFKA_CLUSTER_NAME
generate_shared_pubsub:
outputs:
name: ${{ steps.generate_pubsub_name.outputs.pubsub }}
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_DIRECT_RUNNER_TESTS == 'true' || github.event.inputs.RUN_DATAFLOW_RUNNER_TESTS== 'true'}}
steps:
- name: Sending PubSub name to env
run: |
echo "SHARED_PUBSUB_TOPIC=leader_board-${GITHUB_ACTOR}-python-topic-$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
- id: generate_pubsub_name
run: |
echo "::set-output name=pubsub::$SHARED_PUBSUB_TOPIC"
- name: Creating Pub Sub Topics
run: gcloud pubsub topics create --project=${USER_GCP_PROJECT} ${SHARED_PUBSUB_TOPIC}
java_injector:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_DIRECT_RUNNER_TESTS == 'true' || github.event.inputs.RUN_DATAFLOW_RUNNER_TESTS== 'true'}}
needs: generate_shared_pubsub
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Setup Maven Action
uses: s4u/setup-maven-action@v1.2.1
with:
java-version: 11
- name: Updating Settings
run: |
cd ~
if [[ ! -d .m2 ]]; then
mkdir .m2
fi
cd .m2
if [[ -f ~/.m2/settings.xml ]]; then
mv settings.xml /tmp/
fi
touch settings.xml
echo "<settings>" >> settings.xml
echo " <profiles>" >> settings.xml
echo " <profile>" >> settings.xml
echo " <id>release-repo</id>" >> settings.xml
echo " <activation>" >> settings.xml
echo " <activeByDefault>true</activeByDefault>" >> settings.xml
echo " </activation>" >> settings.xml
echo " <repositories>" >> settings.xml
echo " <repository>" >> settings.xml
echo " <id>Release ${RELEASE_VER} RC${{github.event.inputs.RC_NUM}}</id>" >> settings.xml
echo " <name>Release ${RELEASE_VER} RC${{github.event.inputs.RC_NUM}}</name>" >> settings.xml
echo " <url>${APACHE_CONTENTS_REPO}</url>" >> settings.xml
echo " </repository>" >> settings.xml
echo " </repositories>" >> settings.xml
echo " </profile>" >> settings.xml
echo " </profiles>" >> settings.xml
echo "</settings>" >> settings.xml
cat settings.xml
- name: Generate Maven archetype
run: |
mvn archetype:generate \
-DarchetypeGroupId=org.apache.beam \
-DarchetypeArtifactId=beam-sdks-java-maven-archetypes-examples \
-DarchetypeVersion=2.41.0 \
-DgroupId=org.example \
-DartifactId=word-count-beam \
-Dversion="0.1" \
-Dpackage=org.apache.beam.examples \
-DinteractiveMode=false \
-DarchetypeCatalog=internal
- name: Running Pub/Sub Java injector
run: |
ls
cd word-count-beam
timeout --preserve-status 50m mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.complete.game.injector.Injector -Dexec.args="${USER_GCP_PROJECT} ${{needs.generate_shared_pubsub.outputs.name}} none" || true
direct_runner_leaderboard:
runs-on: [self-hosted, ubuntu-20.04]
if: ${{github.event.inputs.RUN_DIRECT_RUNNER_TESTS == 'true' }}
strategy:
matrix:
py_version: [3.8]
needs: generate_shared_pubsub
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Exporting leaderboard Dataset Name
run: echo "LEADERBOARD_DIRECT_DATASET=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
- name: Creating Dataset
run: bq mk --project_id=${USER_GCP_PROJECT} ${LEADERBOARD_DIRECT_DATASET}
- name: Starting Leaderboard with DirectRunner
run: |
echo '*****************************************************';
echo '* Running Python Leaderboard with DirectRunner';
echo '*****************************************************';
timeout --preserve-status 5m python -m apache_beam.examples.complete.game.leader_board \
--project=${USER_GCP_PROJECT} \
--topic projects/${USER_GCP_PROJECT}/topics/${{needs.generate_shared_pubsub.outputs.name}} \
--dataset ${LEADERBOARD_DIRECT_DATASET} || true
- name: Checking results
run: |
bq head -n 10 ${LEADERBOARD_DIRECT_DATASET}.leader_board_users
bq head -n 10 ${LEADERBOARD_DIRECT_DATASET}.leader_board_teams
- name: Removing BigQuery Dataset
if: always()
run: |
bq rm -f ${LEADERBOARD_DIRECT_DATASET}.leader_board_users
bq rm -f ${LEADERBOARD_DIRECT_DATASET}.leader_board_teams
bq rm -f $LEADERBOARD_DIRECT_DATASET
dataflow_runner_leaderboard:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_DATAFLOW_RUNNER_TESTS=='true'}}
strategy:
matrix:
py_version: [3.8]
needs: [generate_shared_pubsub]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Exporting Dataflow Dataset Name
run: echo "LEADERBOARD_DF_DATASET=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
- name: Creating Dataset
run: bq mk --project_id=${USER_GCP_PROJECT} ${LEADERBOARD_DF_DATASET}
- name: Starting Leaderboard with Dataflow
run: |
echo '*****************************************************';
echo '* Running Python Leaderboard with DataflowRunner';
echo '*****************************************************';
timeout --preserve-status 20m python -m apache_beam.examples.complete.game.leader_board \
--project=${USER_GCP_PROJECT} \
--region=${USER_GCP_REGION} \
--topic projects/${USER_GCP_PROJECT}/topics/${{needs.generate_shared_pubsub.outputs.name}} \
--dataset ${LEADERBOARD_DF_DATASET} \
--runner DataflowRunner \
--temp_location=${USER_GCS_BUCKET}/temp/ \
--sdk_location apache-beam-${RELEASE_VER}.tar.gz || true
- name: Checking results
run: |
bq head -n 10 ${LEADERBOARD_DF_DATASET}.leader_board_users
bq head -n 10 ${LEADERBOARD_DF_DATASET}.leader_board_teams
- name: Removing BigQuery Dataset
if: always()
run: |
bq rm -f ${LEADERBOARD_DF_DATASET}.leader_board_users
bq rm -f ${LEADERBOARD_DF_DATASET}.leader_board_teams
bq rm -f $LEADERBOARD_DF_DATASET
direct_runner_gamestats:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_DIRECT_RUNNER_TESTS == 'true' }}
strategy:
matrix:
py_version: [3.8]
needs: [generate_shared_pubsub]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Exporting Gamestates Direct Dataset Name
run: echo "GAMESTATS_DIRECT_DATASET=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
- name: Creating Dataset
run: bq mk --project_id=${USER_GCP_PROJECT} ${GAMESTATS_DIRECT_DATASET}
- name: Starting Gamestats with DirectRunner
run: |
echo '*****************************************************';
echo '* Running GameStats with DirectRunner';
echo '*****************************************************';
timeout --preserve-status 40m python -m apache_beam.examples.complete.game.game_stats \
--project=${USER_GCP_PROJECT} \
--topic projects/${USER_GCP_PROJECT}/topics/${{needs.generate_shared_pubsub.outputs.name}} \
--dataset ${GAMESTATS_DIRECT_DATASET} \
--fixed_window_duration ${FIXED_WINDOW_DURATION} || true
- name: Checking Results
run: |
bq head -n 10 ${GAMESTATS_DIRECT_DATASET}.game_stats_teams
bq head -n 10 ${GAMESTATS_DIRECT_DATASET}.game_stats_sessions
- name: Removing BigQuery Dataset
if: always()
run: |
bq rm -f ${GAMESTATS_DIRECT_DATASET}.game_stats_sessions
bq rm -f ${GAMESTATS_DIRECT_DATASET}.game_stats_teams
bq rm -f $GAMESTATS_DIRECT_DATASET
dataflow_runner_gamestats:
runs-on: [self-hosted,ubuntu-20.04]
if: ${{github.event.inputs.RUN_DATAFLOW_RUNNER_TESTS=='true'}}
strategy:
matrix:
py_version: [3.8]
needs: [generate_shared_pubsub]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{env.RC_TAG}}
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.py_version}}
- name: Setting python env
uses: ./.github/actions/common-rc-validation
with:
RELEASE_VER: ${{env.RELEASE_VER}}
PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}}
- name: Exporting Gamestates Direct Dataset Name
run: echo "GAMESTATS_DF_DATASET=${GITHUB_ACTOR}_python_validations_$(date +%m%d)_$RANDOM" >> $GITHUB_ENV
- name: Creating Dataset
run: bq mk --project_id=${USER_GCP_PROJECT} ${GAMESTATS_DF_DATASET}
- name: Starting Gamestats with DirectRunner
run: |
echo '*****************************************************';
echo '* Running GameStats with DataflowRunner';
echo '*****************************************************';
timeout --preserve-status 45m python -m apache_beam.examples.complete.game.game_stats \
--project=${USER_GCP_PROJECT} \
--region=${USER_GCP_REGION} \
--topic projects/${USER_GCP_PROJECT}/topics/${{needs.generate_shared_pubsub.outputs.name}} \
--dataset ${GAMESTATS_DF_DATASET} \
--runner DataflowRunner \
--temp_location=${USER_GCS_BUCKET}/temp/ \
--sdk_location apache-beam-${RELEASE_VER}.tar.gz \
--fixed_window_duration ${FIXED_WINDOW_DURATION} || true
- name: Checking Results
run: |
bq head -n 10 ${GAMESTATS_DF_DATASET}.game_stats_teams
bq head -n 10 ${GAMESTATS_DF_DATASET}.game_stats_sessions
- name: Removing BigQuery Dataset
if: always()
run: |
bq rm -f ${GAMESTATS_DF_DATASET}.game_stats_teams
bq rm -f ${GAMESTATS_DF_DATASET}.game_stats_sessions
bq rm -f $GAMESTATS_DF_DATASET
remove_shared_pubsub:
runs-on: [self-hosted,ubuntu-20.04]
needs: [java_injector, generate_shared_pubsub]
if: always()
steps:
- name: Deleting Shared Pub Sub
run: gcloud pubsub topics delete --project=${USER_GCP_PROJECT} ${{needs.generate_shared_pubsub.outputs.name}}