| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| from collections import defaultdict |
| from io import BytesIO |
| import json |
| import logging |
| import os |
| import subprocess |
| import shlex |
| import sys |
| import tempfile |
| import textwrap |
| from typing import Dict, Optional, TextIO |
| |
| logger = logging.getLogger() |
| logger.setLevel(logging.DEBUG) |
| handler = logging.StreamHandler(sys.stderr) |
| handler.setLevel(logging.DEBUG) |
| logger.addHandler(handler) |
| |
| ok = "✅" |
| err = "❌" |
| |
| |
| def get_env(key: str, fn = str) -> Optional: |
| value = os.getenv(key) |
| if value is None: |
| logger.debug(f"Could not find env {key}") |
| return None |
| else: |
| logger.debug(f"Read env {key}: {value}") |
| return fn(value) |
| |
| |
| def has_approval(reviews) -> bool: |
| approved = False |
| for review in reviews: |
| if review.get("authorAssociation") not in ("MEMBER", "OWNER"): |
| continue |
| if review.get("state") == "APPROVED": |
| approved = True |
| return approved |
| |
| |
| def write_commit(io: TextIO, title: str, body: str): |
| io.write(title.encode()) |
| io.write(b"\n\n") |
| io.write(body.encode()) |
| io.flush() |
| |
| |
| def parse_trailers(title, body) -> Dict: |
| trailers = defaultdict(list) |
| |
| with tempfile.NamedTemporaryFile() as fp: |
| write_commit(fp, title, body) |
| cmd = f"git interpret-trailers --trim-empty --parse {fp.name}" |
| p = subprocess.run(shlex.split(cmd), capture_output=True) |
| fp.close() |
| |
| for line in p.stdout.decode().splitlines(): |
| key, value = line.split(":", 1) |
| trailers[key].append(value.strip()) |
| |
| return trailers |
| |
| |
| def split_paragraphs(text: str): |
| """ |
| Split the given text into a generator of paragraph lines and a boolean "markdown" flag. |
| |
| If any line of a paragraph starts with a markdown character, we will assume the whole paragraph |
| contains markdown. |
| """ |
| lines = text.splitlines(keepends=True) |
| paragraph = [] |
| markdown = False |
| for line in lines: |
| if line.strip() == "": |
| if len(paragraph) > 0: |
| yield paragraph, markdown |
| paragraph.clear() |
| markdown = False |
| else: |
| if line[0] in ("#", "*", "-", "=") or line[0].isdigit(): |
| markdown = True |
| if "```" in line: |
| markdown = True |
| paragraph.append(line) |
| yield paragraph, markdown |
| |
| |
| if __name__ == "__main__": |
| """ |
| This script performs some basic linting of our PR titles and body. The PR number is read from the PR_NUMBER |
| environment variable. Since this script expects to run on a GHA runner, it expects the "gh" tool to be installed. |
| |
| The STDOUT from this script is used as the status check message. It should not be too long. Use the logger for |
| any necessary logging. |
| |
| Title checks: |
| * Not too short (at least 15 characters) |
| * Not too long (at most 120 characters) |
| * Not truncated (ending with ...) |
| * Starts with "KAFKA-", "MINOR", or "HOTFIX" |
| |
| Body checks: |
| * Is not empty |
| * Has "Reviewers:" trailer if the PR is approved |
| """ |
| |
| pr_number = get_env("PR_NUMBER") |
| cmd = f"gh pr view {pr_number} --json 'title,body,reviews'" |
| p = subprocess.run(shlex.split(cmd), capture_output=True) |
| if p.returncode != 0: |
| logger.error(f"GitHub CLI failed with exit code {p.returncode}.\nSTDOUT: {p.stdout.decode()}\nSTDERR:{p.stderr.decode()}") |
| exit(1) |
| |
| gh_json = json.loads(p.stdout) |
| title = gh_json["title"] |
| body = gh_json["body"] |
| reviews = gh_json["reviews"] |
| |
| checks = [] # (bool (0=ok, 1=error), message) |
| |
| def check(positive_assertion, ok_msg, err_msg): |
| if positive_assertion: |
| checks.append((0, f"{ok} {ok_msg}")) |
| else: |
| checks.append((1, f"{err} {err_msg}")) |
| |
| # Check title |
| check(not title.endswith("..."), "Title is not truncated", "Title appears truncated (ends with ...)") |
| check(len(title) >= 15, "Title is not too short", "Title is too short (under 15 characters)") |
| check(len(title) <= 120, "Title is not too long", "Title is too long (over 120 characters)") |
| ok_prefix = title.startswith("KAFKA-") or title.startswith("MINOR") or title.startswith("HOTFIX") |
| check(ok_prefix, "Title has expected KAFKA/MINOR/HOTFIX", "Title is missing KAFKA-XXXXX or MINOR/HOTFIX prefix") |
| |
| # Check body |
| check(len(body) != 0, "Body is not empty", "Body is empty") |
| check("Delete this text and replace" not in body, "PR template text not present", "PR template text should be removed") |
| check("Committer Checklist" not in body, "PR template text not present", "Old PR template text should be removed") |
| |
| paragraph_iter = split_paragraphs(body) |
| new_paragraphs = [] |
| for p, markdown in paragraph_iter: |
| if markdown: |
| # If a paragraph looks like it has markdown in it, wrap each line separately. |
| new_lines = [] |
| for line in p: |
| new_lines.append(textwrap.fill(line, width=72, break_long_words=False, break_on_hyphens=False, replace_whitespace=False)) |
| rewrapped_p = "\n".join(new_lines) |
| else: |
| indent = "" |
| if len(p) > 0 and p[0].startswith("Reviewers:"): |
| indent = " " |
| rewrapped_p = textwrap.fill("".join(p), subsequent_indent=indent, width=72, break_long_words=False, break_on_hyphens=False, replace_whitespace=True) |
| new_paragraphs.append(rewrapped_p + "\n") |
| body = "\n".join(new_paragraphs) |
| |
| if get_env("GITHUB_ACTIONS"): |
| with tempfile.NamedTemporaryFile() as fp: |
| fp.write(body.encode()) |
| fp.flush() |
| cmd = f"gh pr edit {pr_number} --body-file {fp.name}" |
| p = subprocess.run(shlex.split(cmd), capture_output=True) |
| fp.close() |
| if p.returncode != 0: |
| logger.error(f"Could not update PR {pr_number}. STDOUT: {p.stdout.decode()}") |
| else: |
| logger.info(f"Not reformatting {pr_number} since this is not running on GitHub Actions.") |
| |
| # Check for Reviewers |
| approved = has_approval(reviews) |
| if approved: |
| trailers = parse_trailers(title, body) |
| reviewers_in_body = trailers.get("Reviewers", []) |
| check(len(reviewers_in_body) > 0, "Found 'Reviewers' in commit body", "Pull Request is approved, but no 'Reviewers' found in commit body") |
| if len(reviewers_in_body) > 0: |
| for reviewer_in_body in reviewers_in_body: |
| logger.debug(reviewer_in_body) |
| |
| logger.debug("Commit will look like:\n") |
| logger.debug("<pre>") |
| io = BytesIO() |
| title += f" (#{pr_number})" |
| write_commit(io, title, body) |
| io.seek(0) |
| logger.debug(io.read().decode()) |
| logger.debug("</pre>\n") |
| |
| exit_code = 0 |
| logger.debug("Validation results:") |
| for err, msg in checks: |
| logger.debug(f"* {msg}") |
| |
| for err, msg in checks: |
| # Just output the first error for the status message. STDOUT becomes the status check message |
| if err: |
| print(msg) |
| exit(1) |
| |
| logger.debug("No validation errors, PR format looks good!") |
| print("PR format looks good!") |
| exit(0) |