[pulsarbot] Handle bug in "/pulsarbot rerun-failure-checks" that reran obsolete jobs (#33)
- only the most recent job should be considered for failed jobs
diff --git a/pulsarbot/README.md b/pulsarbot/README.md
index 6ab1710..92af307 100644
--- a/pulsarbot/README.md
+++ b/pulsarbot/README.md
@@ -9,4 +9,14 @@
- `/pulsarbot run-failure-checks`: Run all the failed checks.
- `/pulsarbot rerun-failure-checks`: Rerun all the failed checks. Same as `/pulsarbot run-failure-checks`.
- `/pulsarbot run <check-name>`: Run a specified check only if the check is failed.
-- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`
\ No newline at end of file
+- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`
+
+
+### Testing changes to `entrypoint.sh` script
+
+You can test modifications to the `entrypoint.sh` script locally with the `test_pulsarbot.sh` script.
+
+Syntax for testing changes
+```bash
+GITHUB_TOKEN=your_token_here ./test_pulsarbot.sh PR_NUMBER_HERE
+```
diff --git a/pulsarbot/entrypoint.sh b/pulsarbot/entrypoint.sh
index fa9d36b..065bb00 100755
--- a/pulsarbot/entrypoint.sh
+++ b/pulsarbot/entrypoint.sh
@@ -1,8 +1,10 @@
#!/bin/bash
-
+if [[ $TESTMODE == 1 ]]; then
+ set -x
+ cat ${GITHUB_EVENT_PATH}
+fi
set -e
-cat ${GITHUB_EVENT_PATH}
COMMENT_BODY=$(jq -r '.comment.body' "${GITHUB_EVENT_PATH}")
BOT_COMMAND_PREFIX="/pulsarbot"
@@ -13,7 +15,6 @@
exit
fi
-
read -r -a commands <<< "${COMMENT_BODY}"
BOT_COMMAND=${commands[1]}
CHECK_NAME=""
@@ -38,8 +39,8 @@
PR_NUM=$(jq -r '.issue.number' "${GITHUB_EVENT_PATH}")
function github_get() {
- path="$1"
- github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${path}"
+ local urlpath="$1"
+ github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${urlpath}"
}
function github_client() {
@@ -51,24 +52,64 @@
HEAD_SHA=$(printf "%s" "${PR_JSON}" | jq -r .head.sha)
PR_BRANCH=$(printf "%s" "${PR_JSON}" | jq -r .head.ref)
PR_USER=$(printf "%s" "${PR_JSON}" | jq -r .head.user.login)
+PR_HTML_URL=$(printf "%s" "${PR_JSON}" | jq -r .html_url)
+
+echo "Handling pulsarbot command for PR #${PR_NUM} ${PR_HTML_URL}"
function get_runs() {
- status="${1:-failure}"
+ local page="${1:-1}"
# API reference https://docs.github.com/en/rest/reference/actions#list-workflow-runs-for-a-repository
- github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&status=${status}&per_page=100" | jq -r --arg head_sha "${HEAD_SHA}" '.workflow_runs[] | select(.head_sha==$head_sha) | .url'
+ github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&page=${page}&per_page=100" \
+ | jq -r --arg head_sha "${HEAD_SHA}" \
+ '.workflow_runs[] | select(.head_sha==$head_sha) | [.workflow_id,.created_at,.conclusion // .status,.url,.name,.html_url] | @csv'
}
-# find the failures
-FAILED_URLS=$(get_runs failure)
-CANCELLED_URLS=$(get_runs cancelled)
-for url in $FAILED_URLS $CANCELLED_URLS; do
- name=$(github_client "$url"|jq -r '.name')
+# take the last attempt for each workflow to prevent restarting old runs
+function filter_oldruns() {
+ awk -F, '{ if (NR > 1 && LAST != null && LAST != $1) {print LASTLINE; print $0; LAST=null; LASTLINE=null} else { LAST = $1;LASTLINE = $0} } END { if (LASTLINE != null) { print LASTLINE } }'
+}
+
+function get_all_runs() {
+ local page=1
+ local tempfile=$(mktemp)
+ while true; do
+ csv="$(get_runs $page | tee -a $tempfile)"
+ if [ -z "$csv" ]; then
+ break
+ fi
+ ((page++))
+ done
+ if [ -f $tempfile ]; then
+ if [ -s $tempfile ]; then
+ cat $tempfile | sort
+ fi
+ rm $tempfile
+ fi
+}
+
+# return url and name for failed or cancelled jobs that are the most recent ones for each workflow
+function find_failed_or_cancelled() {
+ get_all_runs | filter_oldruns \
+ | awk -F, '{ gsub(/"/, ""); if ($3 == "failure" || $3 == "cancelled") { print $4 "\t" $5 "\t" $6 } }'
+}
+
+# allocate file descriptor for the failed or cancelled url and name listing
+exec {failures_fd}< <(find_failed_or_cancelled)
+
+foundjobs=0
+# handle failures
+while IFS=$'\t' read -r url name html_url <&${failures_fd}; do
if [[ "${CHECK_NAME}" == "_all" || "${name}" == *"${CHECK_NAME}"* ]]; then
- echo "rerun-failed-jobs for '${name}' ($url)"
+ echo "rerun-failed-jobs for '${name}'. Follow progress at $html_url"
# use https://docs.github.com/en/rest/reference/actions#re-run-failed-jobs-from-a-workflow-run
# to rerun only the failed jobs
github_client -X POST "${url}/rerun-failed-jobs"
+ ((foundjobs++))
else
- echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($url)"
+ echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($html_url)"
fi
done
+
+if [[ $foundjobs == 0 ]]; then
+ echo >&2 "Cannot find any failed workflow runs in PR #${PR_NUM}. Re-running can only target completed workflows."
+fi
diff --git a/pulsarbot/test_pulsarbot.sh b/pulsarbot/test_pulsarbot.sh
new file mode 100755
index 0000000..07799af
--- /dev/null
+++ b/pulsarbot/test_pulsarbot.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+PRNUM=${1:-99999}
+echo "Using PRNUM=$PRNUM"
+cat > /tmp/testevent.json$$ <<EOF
+{
+ "comment": {
+ "body": "${COMMENT_BODY:-"/pulsarbot rerun-failure-checks"}"
+ },
+ "issue": {
+ "number": $PRNUM
+ }
+}
+EOF
+echo "Building docker image..."
+docker build -t pulsarbot . || exit 1
+docker run -v /tmp/testevent.json$$:/tmp/testevent.json -e TESTMODE="${TESTMODE:-1}" -e GITHUB_TOKEN -e GITHUB_EVENT_PATH=/tmp/testevent.json pulsarbot
+rm /tmp/testevent.json$$