Introduce /bkbot Command to Control CI Workflow Runs via PR Comments (#4673)
* Introduce /bkbot Command to Control CI Workflow Runs via PR Comments
diff --git a/.github/workflows/ci-bkbot.yaml b/.github/workflows/ci-bkbot.yaml
new file mode 100644
index 0000000..961f6b2
--- /dev/null
+++ b/.github/workflows/ci-bkbot.yaml
@@ -0,0 +1,298 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Description:
+# This GitHub Actions workflow enables rerunning CI via PR/Issue comments using the /bkbot command.
+# Supported commands: /bkbot rerun [keyword]
+# - /bkbot rerun => Rerun the latest run of each workflow under the same head SHA, limited to runs with a conclusion of failure/cancelled/timed_out/skipped (entire run).
+# - /bkbot rerun <keyword> => Regardless of workflow/job status, fetch all jobs in the latest runs, match by name, and rerun each matching job.
+# Logging instructions:
+# - Jobs that are failed/cancelled/timed_out/skipped are scanned from all the latest workflow runs (including those in progress), thus jobs fail/skipped during progress can be captured.
+# Triggering condition: When a new comment is created containing /bkbot.
+
+name: BookKeeper Bot
+on:
+ issue_comment:
+ types: [created]
+
+permissions:
+ actions: write
+ contents: read
+
+jobs:
+ bkbot:
+ runs-on: ubuntu-24.04
+ timeout-minutes: 10
+ if: github.event_name == 'issue_comment' && contains(github.event.comment.body, '/bkbot')
+ steps:
+ - name: Execute bkbot command
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ // Supported commands:
+ // - /bkbot rerun
+ // Reruns all completed workflows with conclusions of failure/timed_out/skipped/cancelled
+ // If workflow is still running, cannot rerun whole workflow, just suggest using "/bkbot rerun jobname"
+ // - /bkbot rerun jobname
+ // Matches job.name by keyword, reruns matching jobs (regardless of current state, failures are logged)
+ // - /bkbot stop or /bkbot cancel
+ // Cancels all still running (queued/in_progress) workflow runs associated with the current PR
+
+ const commentBody = context.payload.comment.body.trim();
+ const prefix = '/bkbot';
+ if (!commentBody.startsWith(prefix)) {
+ console.log('Not a bkbot command, skipping ...');
+ return;
+ }
+
+ if (!context.payload.issue || !context.payload.issue.pull_request) {
+ console.error('This comment is not on a Pull Request. bkbot only works on PRs.');
+ return;
+ }
+
+ const parts = commentBody.split(/\s+/);
+ const sub = (parts[1] || '').toLowerCase();
+ const arg = parts.length > 2 ? parts.slice(2).join(' ') : '';
+
+ const supported = ['rerun', 'stop', 'cancel'];
+ if (!supported.includes(sub)) {
+ console.log(`Unsupported command '${sub}'. Supported: '/bkbot rerun [jobName?]', '/bkbot stop', '/bkbot cancel'.`);
+ return;
+ }
+
+ const prNum = context.payload.issue.number;
+
+ // Get PR info
+ let pr;
+ try {
+ ({ data: pr } = await github.rest.pulls.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNum
+ }));
+ } catch (e) {
+ console.error(`Failed to fetch PR #${prNum}: ${e.message}`);
+ return;
+ }
+
+ const headSha = pr.head.sha;
+ const prBranch = pr.head.ref;
+ const prUser = (pr.head && pr.head.user && pr.head.user.login) ? pr.head.user.login : pr.user.login;
+ const prUrl = pr.html_url;
+
+ console.log(`bkbot handling PR #${prNum} ${prUrl}`);
+ console.log(`PR branch='${prBranch}', headSha='${headSha}', author='${prUser}'`);
+ console.log(`Command parsed => sub='${sub}', arg='${arg || ''}'`);
+
+ // Fetch workflow runs in this repo triggered by this user on this branch, then filter by headSha
+ let page = 1;
+ const allRunsRaw = [];
+ while (true) {
+ const { data } = await github.rest.actions.listWorkflowRunsForRepo({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ actor: prUser,
+ branch: prBranch,
+ per_page: 100,
+ page
+ });
+ const wr = data.workflow_runs || [];
+ if (wr.length === 0) break;
+ allRunsRaw.push(...wr);
+ if (wr.length < 100) break;
+ page++;
+ }
+
+ const runsAtHead = allRunsRaw.filter(r => r.head_sha === headSha);
+ if (runsAtHead.length === 0) {
+ console.error(`No workflow runs found for head SHA ${headSha} on branch ${prBranch}.`);
+ return;
+ }
+
+ // Only keep the latest run for each workflow_id
+ runsAtHead.sort((a, b) => {
+ if (a.workflow_id !== b.workflow_id) return a.workflow_id - b.workflow_id;
+ return new Date(b.created_at) - new Date(a.created_at);
+ });
+ const latestRuns = [];
+ const seen = new Set();
+ for (const r of runsAtHead) {
+ if (!seen.has(r.workflow_id)) {
+ seen.add(r.workflow_id);
+ latestRuns.push(r);
+ }
+ }
+
+ function runKey(r) {
+ return `[run_id=${r.id}] ${r.name || '(unnamed)'} | status=${r.status} | conclusion=${r.conclusion || '-'} | ${r.html_url}`;
+ }
+
+ console.log('--- Latest workflow runs for this PR headSHA (one per workflow) ---');
+ for (const r of latestRuns) console.log('- ' + runKey(r));
+
+ // Utility: list all jobs in a run
+ async function listAllJobs(runId) {
+ let jobs = [];
+ let p = 1;
+ while (true) {
+ const { data } = await github.rest.actions.listJobsForWorkflowRun({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ run_id: runId,
+ per_page: 100,
+ page: p
+ });
+ const js = data.jobs || [];
+ if (js.length === 0) break;
+ jobs.push(...js);
+ if (js.length < 100) break;
+ p++;
+ }
+ return jobs;
+ }
+
+ // Utility: rerun a single job
+ async function rerunJob(job, run) {
+ try {
+ if (github.rest.actions.reRunJobForWorkflowRun) {
+ await github.rest.actions.reRunJobForWorkflowRun({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ job_id: job.id
+ });
+ } else {
+ await github.request('POST /repos/{owner}/{repo}/actions/jobs/{job_id}/rerun', {
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ job_id: job.id
+ });
+ }
+ console.log(`Re-ran job '${job.name}' (job_id=${job.id}) in run '${run.name}' | ${run.html_url}`);
+ return true;
+ } catch (e) {
+ console.log(`Failed to re-run job '${job.name}' (job_id=${job.id}) in run '${run.name}': ${e.message}`);
+ return false;
+ }
+ }
+
+ // Command 1: /bkbot rerun
+ if (sub === 'rerun' && !arg) {
+ const targetConclusions = new Set(['failure', 'timed_out', 'cancelled', 'skipped']);
+ let fullRerunCount = 0;
+ let skippedRunning = 0;
+ let skippedConclusion = 0;
+
+ console.log('Mode: full workflow re-run for completed runs with conclusions in [failure,timed_out,cancelled,skipped].');
+ for (const r of latestRuns) {
+ if (r.status !== 'completed') {
+ console.log(`Skip (still running) ${runKey(r)}. Cannot re-run whole workflow. Consider '/bkbot rerun <jobName>' for single job.`);
+ skippedRunning++;
+ continue;
+ }
+ if (!targetConclusions.has(r.conclusion)) {
+ console.log(`Skip (conclusion not eligible) ${runKey(r)}`);
+ skippedConclusion++;
+ continue;
+ }
+ try {
+ await github.rest.actions.reRunWorkflow({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ run_id: r.id
+ });
+ console.log(`Triggered full re-run for ${runKey(r)}`);
+ fullRerunCount++;
+ } catch (e) {
+ console.log(`Failed to trigger full re-run for ${runKey(r)}: ${e.message}`);
+ }
+ }
+
+ if (fullRerunCount === 0) {
+ console.error(`No eligible workflow runs to re-run. Skipped running=${skippedRunning}, skipped by conclusion=${skippedConclusion}.`);
+ } else {
+ console.log(`Finished. Triggered full re-run for ${fullRerunCount} workflow run(s). Skipped running=${skippedRunning}, skipped by conclusion=${skippedConclusion}.`);
+ }
+ return;
+ }
+
+ // Command 2: /bkbot rerun jobname
+ if (sub === 'rerun' && arg) {
+ const keyword = arg.trim();
+ console.log(`Mode: job-level re-run. keyword='${keyword}'`);
+
+ let matchedJobs = 0;
+ let successJobs = 0;
+
+ for (const r of latestRuns) {
+ let jobs = [];
+ try {
+ jobs = await listAllJobs(r.id);
+ } catch (e) {
+ console.log(`Failed to list jobs for ${runKey(r)}: ${e.message}`);
+ continue;
+ }
+ for (const j of jobs) {
+ if (j.name && j.name.includes(keyword)) {
+ matchedJobs++;
+ const ok = await rerunJob(j, r);
+ if (ok) successJobs++;
+ }
+ }
+ }
+
+ if (matchedJobs === 0) {
+ console.error(`No jobs matched keyword '${keyword}' among latest runs for this PR head.`);
+ } else {
+ console.log(`Finished. Matched ${matchedJobs} job(s); successfully requested re-run for ${successJobs} job(s).`);
+ }
+ return;
+ }
+
+ // Command 3: /bkbot stop or /bkbot cancel
+ if (sub === 'stop' || sub === 'cancel') {
+ console.log('Mode: cancel running workflow runs (queued/in_progress).');
+
+ let cancelCount = 0;
+ let alreadyCompleted = 0;
+
+ for (const r of latestRuns) {
+ if (r.status === 'completed') {
+ console.log(`Skip (already completed) ${runKey(r)}`);
+ alreadyCompleted++;
+ continue;
+ }
+ try {
+ await github.rest.actions.cancelWorkflowRun({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ run_id: r.id
+ });
+ console.log(`Cancel requested for ${runKey(r)}`);
+ cancelCount++;
+ } catch (e) {
+ console.log(`Failed to cancel ${runKey(r)}: ${e.message}`);
+ }
+ }
+
+ if (cancelCount === 0) {
+ console.error(`No running workflow runs to cancel. Already completed: ${alreadyCompleted}.`);
+ } else {
+ console.log(`Finished. Requested cancel for ${cancelCount} running workflow run(s). Already completed: ${alreadyCompleted}.`);
+ }
+ return;
+ }