| #!/usr/bin/env python3 |
| ############################################################################ |
| # tools/codeowners/codeoweners.py |
| # |
| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. The |
| # ASF licenses this file to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance with the |
| # License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| # |
| ############################################################################ |
| |
| import os |
| import re |
| import subprocess |
| import sys |
| from multiprocessing import Pool |
| from typing import TypeAlias |
| |
| # Number of processes to use |
| N_PROCESSES: int = 20 |
| |
| # Top `n` authors to list as code owners in the CODEOWNERS file |
| TOP_N_AUTHORS: int = 5 |
| |
| # git repository folder |
| GIT_FOLDER: str = ".git" |
| |
| # Command to get author email and number of changes |
| CHANGE_STATS_CMD: list[str] = [ |
| "git", |
| "--no-pager", |
| "log", |
| '--pretty="%ae"', |
| "--shortstat", |
| ] |
| |
| # Command to list all files tracked by git |
| GIT_FILE_CMD: list[str] = ["git", "ls-files"] |
| |
| # Locations to ignore for code owner generation |
| IGNORE_LOCS: list[str] = [ |
| # Generated release notes |
| "Documentation/ReleaseNotes/*", |
| # Boiler plate files |
| "**/__init__.py", |
| # Files that aren't really necessary to have owners |
| "**/Make.defs", |
| "**/CMakeLists.txt", |
| # Image files from documentation |
| "*.png", |
| "*.jpg", |
| ] |
| |
| # Represents a code owner by email |
| CodeOwner: TypeAlias = str |
| |
| |
| class Ownership: |
| """Represents an ownership relationship for a path.""" |
| |
| def __init__(self, owner: CodeOwner, path: str, changes: int) -> None: |
| """Creates a new ownership relationship.""" |
| self.owner: CodeOwner = owner |
| self.path: str = path |
| self.changes: int = changes |
| |
| def __str__(self) -> str: |
| return f"Ownership(path='{self.path}', owner='{self.owner}', changes={self.changes})" |
| |
| def __repr__(self) -> str: |
| return self.__str__() |
| |
| |
| # Represents all contributors to a file |
| Roster: TypeAlias = dict[str, list[Ownership]] |
| |
| |
| def parse_change_count(changestr: str) -> int: |
| """Parses a string summary of changes to the file into an integer number of changes.""" |
| |
| change_types = changestr.split(",") |
| |
| # Skip the number of files changed, which is always the first index |
| count = 0 |
| for change in change_types[1:]: |
| value = re.search(r"\d+", change) |
| if value is not None: |
| count += int(value.group()) |
| |
| return count |
| |
| |
| def get_owners(path: str) -> list[Ownership]: |
| """Gets ownership relationships for `path`.""" |
| |
| cmd = CHANGE_STATS_CMD.copy() + [path] |
| raw_logs = subprocess.run(cmd, capture_output=True, text=True).stdout |
| logs = [e for e in raw_logs.split("\n") if e != ""] |
| |
| ownerships: dict[CodeOwner, Ownership] = dict() |
| for raw_email, raw_changes in zip(logs[::2], logs[1::2]): |
| owner = raw_email.replace('"', "") |
| change_count = parse_change_count(raw_changes) |
| |
| if ownerships.get(owner) is None: |
| ownerships[owner] = Ownership( |
| path=path, |
| owner=owner, |
| changes=change_count, |
| ) |
| else: |
| ownerships[owner].changes += change_count |
| |
| return list(ownerships.values()) |
| |
| |
| def sort_by_changes(owners: list[Ownership]) -> None: |
| """Sorts `owners` by the number of changes each owner has made, from most to least changes.""" |
| owners.sort(key=lambda o: o.changes, reverse=True) |
| |
| |
| def filter_owners(owners: list[Ownership], ignore: list[CodeOwner]) -> list[Ownership]: |
| """Returns a list composing of the original `owners` list, but without any of the owners in the `ignore` list.""" |
| return [o for o in owners if o.owner not in ignore] |
| |
| |
| def get_files() -> list[str]: |
| """Returns a list of currently tracked files in the git repository.""" |
| |
| ignored_dirs = [] |
| for loc in IGNORE_LOCS: |
| ignored_dirs.append(f":!:{loc}") |
| cmd = GIT_FILE_CMD.copy() + ["--"] + ignored_dirs |
| return ( |
| subprocess.run(cmd, text=True, capture_output=True).stdout.strip().split("\n") |
| ) |
| |
| |
| def parse_ignore_list(path: str) -> list[CodeOwner]: |
| """Parses a file of code owners to ignore into a list.""" |
| |
| owners = [] |
| with open(path, "r") as file: |
| for line in file: |
| owners.append(line.strip()) |
| return owners |
| |
| |
| def generate_codeowners(roster: Roster) -> None: |
| """Generates the output code-owner file in the console from the `roster`.""" |
| |
| for file, ownerships in roster.items(): |
| print(file, end=" ") |
| for o in ownerships: |
| print(o.owner, end=" ") |
| print() |
| |
| |
| def main() -> None: |
| |
| # Check that we are running in the repository root |
| if GIT_FOLDER not in os.listdir(): |
| print("You should only run this script from the root of the git repository.") |
| return |
| |
| ignore_list = [] |
| |
| if len(sys.argv) < 2: |
| print( |
| "You can pass in the path to the list of emails to ignore when generating the code owner file." |
| "This prevents these authors from getting auto-requested in PR reviews." |
| ) |
| print("Ex: ./tools/codeowners.py ./tools/codeowners-ignore.txt") |
| else: |
| ignore_list = parse_ignore_list(sys.argv[1]) |
| |
| source_files = get_files() |
| roster: Roster = dict() |
| |
| with Pool(N_PROCESSES) as p: |
| |
| for file, ownerships in zip(source_files, p.map(get_owners, source_files)): |
| sort_by_changes(ownerships) # Sort by greatest change count |
| ownerships = ownerships[:TOP_N_AUTHORS] # Only use the top `N` |
| ownerships = filter_owners( |
| ownerships, ignore_list |
| ) # Filter authors who wish to be ignored |
| |
| # Skip empty lines since they do nothing of value |
| if len(ownerships) == 0: |
| continue |
| |
| # Store in roster |
| roster[file] = ownerships |
| |
| # Print like code-owner file |
| generate_codeowners(roster) |
| |
| |
| if __name__ == "__main__": |
| main() |