blob: 0ecdf76dd01419d2fb75c68207f8df3379882ee3 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import re
import datetime
import json
import sys
import textwrap
from pathlib import Path
from typing import List
# Hackery to enable importing of utils from ci/scripts/jenkins
REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
sys.path.append(str(REPO_ROOT / "ci" / "scripts" / "jenkins"))
from git_utils import git, parse_remote
GIT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
def prs_query(user: str, repo: str, cursor: str = None):
after = ""
if cursor is not None:
after = f', before:"{cursor}"'
time_keys = "createdAt updatedAt lastEditedAt publishedAt"
return f"""
{{
repository(name: "{repo}", owner: "{user}") {{
pullRequests(states: [OPEN], last: 10{after}) {{
edges {{
cursor
}}
nodes {{
number
url
body
{time_keys}
isDraft
author {{
login
}}
reviews(last:100) {{
nodes {{
{time_keys}
bodyText
author {{ login }}
comments(last:100) {{
nodes {{
{time_keys}
bodyText
}}
}}
}}
}}
comments(last:100) {{
nodes {{
authorAssociation
bodyText
{time_keys}
author {{
login
}}
}}
}}
}}
}}
}}
}}
"""
def find_reviewers(body: str) -> List[str]:
matches = re.findall(r"(cc( @[-A-Za-z0-9]+)+)", body, flags=re.MULTILINE)
matches = [full for full, last in matches]
reviewers = []
for match in matches:
if match.startswith("cc "):
match = match.replace("cc ", "")
users = [x.strip() for x in match.split("@")]
reviewers += users
reviewers = set(x for x in reviewers if x != "")
return list(reviewers)
def check_pr(pr, wait_time, now):
last_action = None
author = pr["author"]["login"]
def update_last(new_time, description):
if isinstance(new_time, str):
new_time = datetime.datetime.strptime(new_time, GIT_DATE_FORMAT)
if new_time is None:
print(f" time not found: {description}")
return
nonlocal last_action
if last_action is None or new_time > last_action[0]:
last_action = (new_time, description)
def check_obj(obj, name):
update_last(obj["publishedAt"], f"{name} publishedAt: {obj}")
update_last(obj["updatedAt"], f"{name} updatedAt: {obj}")
update_last(obj["lastEditedAt"], f"{name} lastEditedAt: {obj}")
update_last(obj["createdAt"], f"{name} lastEditedAt: {obj}")
check_obj(pr, "pr")
# GitHub counts comments left as part of a review separately than standalone
# comments
reviews = pr["reviews"]["nodes"]
review_comments = []
for review in reviews:
review_comments += review["comments"]["nodes"]
check_obj(review, "review")
# Collate all comments
comments = pr["comments"]["nodes"] + review_comments
# Find the last date of any comment
for comment in comments:
check_obj(comment, "comment")
time_since_last_action = now - last_action[0]
# Find reviewers in the PR's body
pr_body_reviewers = find_reviewers(pr["body"])
# Pull out reviewers from any cc @... text in a comment
cc_reviewers = [find_reviewers(c["bodyText"]) for c in comments]
cc_reviewers = [r for revs in cc_reviewers for r in revs]
# Anyone that has left a review as a reviewer (this may include the PR
# author since their responses count as reviews)
review_reviewers = list(set(r["author"]["login"] for r in reviews))
reviewers = cc_reviewers + review_reviewers + pr_body_reviewers
reviewers = list(set(reviewers))
reviewers = [r for r in reviewers if r != author]
if time_since_last_action > wait_time:
print(
" Pinging reviewers",
reviewers,
"on",
pr["url"],
"since it has been",
time_since_last_action,
f"since anything happened on that PR (last action: {last_action[1]})",
)
return reviewers
else:
print(
f" Not pinging PR {pr['number']} since it has been only {time_since_last_action} since the last action: {last_action[1]}"
)
return None
def make_ping_message(pr, reviewers):
reviewers = [f"@{r}" for r in reviewers]
author = f'@{pr["author"]["login"]}'
text = (
"It has been a while since this PR was updated, "
+ " ".join(reviewers)
+ " please leave a review or address the outstanding comments. "
+ f"{author} if this PR is still a work in progress, please [convert it to a draft](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-stage-of-a-pull-request#converting-a-pull-request-to-a-draft)"
" until it is ready for review."
)
return text
if __name__ == "__main__":
help = "Comment on languishing issues and PRs"
parser = argparse.ArgumentParser(description=help)
parser.add_argument("--remote", default="origin", help="ssh remote to parse")
parser.add_argument("--wait-time-minutes", required=True, type=int, help="ssh remote to parse")
parser.add_argument("--cutoff-pr-number", default=0, type=int, help="ssh remote to parse")
parser.add_argument("--dry-run", action="store_true", help="don't update GitHub")
parser.add_argument("--pr-json", help="(testing) data for testing to use instead of GitHub")
parser.add_argument("--now", help="(testing) custom string for current time")
args = parser.parse_args()
remote = git(["config", "--get", f"remote.{args.remote}.url"])
user, repo = parse_remote(remote)
wait_time = datetime.timedelta(minutes=int(args.wait_time_minutes))
cutoff_pr_number = int(args.cutoff_pr_number)
print(
"Running with:\n"
f" time cutoff: {wait_time}\n"
f" number cutoff: {cutoff_pr_number}\n"
f" dry run: {args.dry_run}\n"
f" user/repo: {user}/{repo}\n",
end="",
)
if args.pr_json:
r = json.loads(args.pr_json)
else:
q = prs_query(user, repo)
r = github.graphql(q)
now = datetime.datetime.utcnow()
if args.now:
now = datetime.datetime.strptime(args.now, GIT_DATE_FORMAT)
# Loop until all PRs have been checked
while True:
prs = r["data"]["repository"]["pullRequests"]["nodes"]
# Don't look at draft PRs at all
prs_to_check = []
for pr in prs:
if pr["isDraft"]:
print(f"Skipping #{pr['number']} since it's a draft")
elif pr["number"] <= cutoff_pr_number:
print(
f"Skipping #{pr['number']} since it's too old ({pr['number']} <= {cutoff_pr_number})"
)
else:
print(f"Checking #{pr['number']}")
prs_to_check.append(pr)
print(f"Summary: Checking {len(prs_to_check)} of {len(prs)} fetched")
# Ping reviewers on each PR in the response if necessary
for pr in prs_to_check:
print("Checking", pr["url"])
reviewers = check_pr(pr, wait_time, now)
if reviewers is not None:
message = make_ping_message(pr, reviewers)
if args.dry_run:
print(
f"Would have commented on #{pr['number']}:\n{textwrap.indent(message, prefix=' ')}"
)
else:
r = github.post(f"issues/{pr['number']}/comments", {"body": message})
print(r)
edges = r["data"]["repository"]["pullRequests"]["edges"]
if len(edges) == 0:
# No more results to check
break
cursor = edges[0]["cursor"]
r = github.graphql(prs_query(user, repo, cursor))