| #!/usr/bin/env python3 |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import argparse |
| import os |
| import pickle |
| from pathlib import Path |
| import csv |
| import sys |
| from typing import Callable, Dict, List, Any |
| |
| REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent |
| sys.path.append(str(REPO_ROOT / "ci" / "scripts" / "jenkins")) |
| sys.path.append(str(REPO_ROOT / "ci" / "scripts" / "github")) |
| |
| from git_utils import git, GitHubRepo |
| from github_tag_teams import tags_from_title |
| |
| |
| GITHUB_TOKEN = os.environ["GITHUB_TOKEN"] |
| |
| |
| PRS_QUERY = """ |
| query ($owner: String!, $name: String!, $after: String, $pageSize: Int!) { |
| repository(owner: $owner, name: $name) { |
| defaultBranchRef { |
| name |
| target { |
| ... on Commit { |
| oid |
| history(after: $after, first: $pageSize) { |
| pageInfo { |
| hasNextPage |
| endCursor |
| } |
| nodes { |
| oid |
| committedDate |
| associatedPullRequests(first: 1) { |
| nodes { |
| number |
| additions |
| changedFiles |
| deletions |
| author { |
| login |
| } |
| title |
| body |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| """ |
| |
| |
| def append_and_save(items, file): |
| if not file.exists(): |
| data = [] |
| else: |
| with open(file, "rb") as f: |
| data = pickle.load(f) |
| |
| data += items |
| with open(file, "wb") as f: |
| pickle.dump(data, f) |
| |
| |
| def fetch_pr_data(args, cache): |
| github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN) |
| |
| if args.from_commit is None or args.to_commit is None: |
| print("--from-commit and --to-commit must be specified if --skip-query is not used") |
| exit(1) |
| |
| i = 0 |
| page_size = 80 |
| cursor = f"{args.from_commit} {i}" |
| |
| while True: |
| try: |
| r = github.graphql( |
| query=PRS_QUERY, |
| variables={ |
| "owner": user, |
| "name": repo, |
| "after": cursor, |
| "pageSize": page_size, |
| }, |
| ) |
| except RuntimeError as e: |
| print(f"{e}\nPlease check enviroment variable GITHUB_TOKEN whether is valid.") |
| exit(1) |
| data = r["data"]["repository"]["defaultBranchRef"]["target"]["history"] |
| if not data["pageInfo"]["hasNextPage"]: |
| break |
| cursor = data["pageInfo"]["endCursor"] |
| results = data["nodes"] |
| |
| to_add = [] |
| stop = False |
| for r in results: |
| if r["oid"] == args.to_commit: |
| print(f"Found {r['oid']}, stopping") |
| stop = True |
| break |
| else: |
| to_add.append(r) |
| |
| oids = [r["oid"] for r in to_add] |
| print(oids) |
| append_and_save(to_add, cache) |
| if stop: |
| break |
| print(i) |
| i += page_size |
| |
| |
| def write_csv( |
| filename: str, data: List[Dict[str, Any]], threshold_filter: Callable[[Dict[str, Any]], bool] |
| ) -> None: |
| with open(filename, "w", newline="") as csvfile: |
| writer = csv.writer(csvfile, quotechar='"') |
| writer.writerow( |
| ( |
| "category", |
| "subject", |
| "date", |
| "url", |
| "author", |
| "pr_title_tags", |
| "pr_title", |
| "additions", |
| "deletions", |
| "additions+deletions>threshold", |
| "changed_files", |
| ) |
| ) |
| for item in data: |
| nodes = item["associatedPullRequests"]["nodes"] |
| if len(nodes) == 0: |
| continue |
| pr = nodes[0] |
| tags = tags_from_title(pr["title"]) |
| actual_tags = [] |
| for t in tags: |
| items = [x.strip() for x in t.split(",")] |
| actual_tags += items |
| tags = actual_tags |
| tags = [t.lower() for t in tags] |
| category = tags[0] if len(tags) > 0 else "" |
| author = pr["author"] if pr["author"] else "ghost" |
| author = author.get("login", "") if isinstance(author, dict) else author |
| writer.writerow( |
| ( |
| category, |
| "n/a", |
| item["committedDate"], |
| f'https://github.com/apache/tvm/pull/{pr["number"]}', |
| author, |
| "/".join(tags), |
| pr["title"].replace(",", " "), |
| pr["additions"], |
| pr["deletions"], |
| 1 if threshold_filter(pr) else 0, |
| pr["changedFiles"], |
| ) |
| ) |
| print(f"{filename} generated!") |
| |
| |
| if __name__ == "__main__": |
| help = "List out commits with attached PRs since a certain commit" |
| parser = argparse.ArgumentParser(description=help) |
| parser.add_argument("--from-commit", help="commit to start checking PRs from") |
| parser.add_argument("--to-commit", help="commit to stop checking PRs from") |
| parser.add_argument( |
| "--threshold", default=0, help="sum of additions + deletions to consider large, such as 150" |
| ) |
| parser.add_argument( |
| "--skip-query", action="store_true", help="don't query GitHub and instead use cache file" |
| ) |
| args = parser.parse_args() |
| user = "apache" |
| repo = "tvm" |
| threshold = int(args.threshold) |
| |
| cache = Path("out.pkl") |
| if not args.skip_query: |
| fetch_pr_data(args, cache) |
| |
| with open(cache, "rb") as f: |
| data = pickle.load(f) |
| |
| print(f"Found {len(data)} PRs") |
| |
| write_csv( |
| filename="out_pr_gathered.csv", |
| data=data, |
| threshold_filter=lambda pr: pr["additions"] + pr["deletions"] > threshold, |
| ) |