blob: 5df8721bdf5cd9f877787ab81b5f523593917d1b [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import os
import pickle
from pathlib import Path
import csv
import sys
from typing import Callable, Dict, List, Any
REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
sys.path.append(str(REPO_ROOT / "ci" / "scripts" / "jenkins"))
sys.path.append(str(REPO_ROOT / "ci" / "scripts" / "github"))
from git_utils import git, GitHubRepo
from github_tag_teams import tags_from_title
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
PRS_QUERY = """
query ($owner: String!, $name: String!, $after: String, $pageSize: Int!) {
repository(owner: $owner, name: $name) {
defaultBranchRef {
name
target {
... on Commit {
oid
history(after: $after, first: $pageSize) {
pageInfo {
hasNextPage
endCursor
}
nodes {
oid
committedDate
associatedPullRequests(first: 1) {
nodes {
number
additions
changedFiles
deletions
author {
login
}
title
body
}
}
}
}
}
}
}
}
}
"""
def append_and_save(items, file):
if not file.exists():
data = []
else:
with open(file, "rb") as f:
data = pickle.load(f)
data += items
with open(file, "wb") as f:
pickle.dump(data, f)
def fetch_pr_data(args, cache):
github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN)
if args.from_commit is None or args.to_commit is None:
print("--from-commit and --to-commit must be specified if --skip-query is not used")
exit(1)
i = 0
page_size = 80
cursor = f"{args.from_commit} {i}"
while True:
try:
r = github.graphql(
query=PRS_QUERY,
variables={
"owner": user,
"name": repo,
"after": cursor,
"pageSize": page_size,
},
)
except RuntimeError as e:
print(f"{e}\nPlease check enviroment variable GITHUB_TOKEN whether is valid.")
exit(1)
data = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]
if not data["pageInfo"]["hasNextPage"]:
break
cursor = data["pageInfo"]["endCursor"]
results = data["nodes"]
to_add = []
stop = False
for r in results:
if r["oid"] == args.to_commit:
print(f"Found {r['oid']}, stopping")
stop = True
break
else:
to_add.append(r)
oids = [r["oid"] for r in to_add]
print(oids)
append_and_save(to_add, cache)
if stop:
break
print(i)
i += page_size
def write_csv(
filename: str, data: List[Dict[str, Any]], threshold_filter: Callable[[Dict[str, Any]], bool]
) -> None:
with open(filename, "w", newline="") as csvfile:
writer = csv.writer(csvfile, quotechar='"')
writer.writerow(
(
"category",
"subject",
"date",
"url",
"author",
"pr_title_tags",
"pr_title",
"additions",
"deletions",
"additions+deletions>threshold",
"changed_files",
)
)
for item in data:
nodes = item["associatedPullRequests"]["nodes"]
if len(nodes) == 0:
continue
pr = nodes[0]
tags = tags_from_title(pr["title"])
actual_tags = []
for t in tags:
items = [x.strip() for x in t.split(",")]
actual_tags += items
tags = actual_tags
tags = [t.lower() for t in tags]
category = tags[0] if len(tags) > 0 else ""
author = pr["author"] if pr["author"] else "ghost"
author = author.get("login", "") if isinstance(author, dict) else author
writer.writerow(
(
category,
"n/a",
item["committedDate"],
f'https://github.com/apache/tvm/pull/{pr["number"]}',
author,
"/".join(tags),
pr["title"].replace(",", " "),
pr["additions"],
pr["deletions"],
1 if threshold_filter(pr) else 0,
pr["changedFiles"],
)
)
print(f"{filename} generated!")
if __name__ == "__main__":
help = "List out commits with attached PRs since a certain commit"
parser = argparse.ArgumentParser(description=help)
parser.add_argument("--from-commit", help="commit to start checking PRs from")
parser.add_argument("--to-commit", help="commit to stop checking PRs from")
parser.add_argument(
"--threshold", default=0, help="sum of additions + deletions to consider large, such as 150"
)
parser.add_argument(
"--skip-query", action="store_true", help="don't query GitHub and instead use cache file"
)
args = parser.parse_args()
user = "apache"
repo = "tvm"
threshold = int(args.threshold)
cache = Path("out.pkl")
if not args.skip_query:
fetch_pr_data(args, cache)
with open(cache, "rb") as f:
data = pickle.load(f)
print(f"Found {len(data)} PRs")
write_csv(
filename="out_pr_gathered.csv",
data=data,
threshold_filter=lambda pr: pr["additions"] + pr["deletions"] > threshold,
)