blob: 33e543a77f3b0b370ebdde43b238505ae062510f [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This tool is based on the Spark merge_spark_pr script:
# https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py
from __future__ import annotations
import re
import sys
from collections import Counter, defaultdict
import git
import rich_click as click
from github import Github
from github.Issue import Issue
from github.PullRequest import PullRequest
from packaging import version
GIT_COMMIT_FIELDS = ["id", "author_name", "author_email", "date", "subject", "body"]
GIT_LOG_FORMAT = "%x1f".join(["%h", "%an", "%ae", "%ad", "%s", "%b"]) + "%x1e"
pr_title_re = re.compile(r".*\((#[0-9]{1,6})\)$")
STATUS_COLOR_MAP = {
"Closed": "green",
"Open": "red",
}
DEFAULT_SECTION_NAME = "Uncategorized"
def get_commits_between(
repo,
previous_version,
target_version,
files=None,
):
log_args = [f"--format={GIT_LOG_FORMAT}", previous_version + ".." + target_version]
if files:
log_args.append("--")
log_args.append(" ".join(files))
log = repo.git.log(*log_args)
log = log.strip("\n\x1e").split("\x1e")
log = [row.strip().split("\x1f") for row in log]
return [dict(zip(GIT_COMMIT_FIELDS, row)) for row in log]
def style_issue_status(status):
if status in STATUS_COLOR_MAP:
return click.style(status[:6].ljust(6), STATUS_COLOR_MAP[status])
return status[:6].ljust(6)
def get_issue_type(issue):
label_prefix = "type:"
issue_type = DEFAULT_SECTION_NAME
if issue.labels:
for label in issue.labels:
if label.name.startswith(label_prefix):
return label.name.replace(label_prefix, "").strip()
if label.name == "changelog:skip":
return "(skip)"
return issue_type
def get_commit_in_main_associated_with_pr(repo: git.Repo, issue: Issue) -> str | None:
"""For a PR, find the associated merged commit & return its SHA"""
if issue.pull_request:
commit = repo.git.log(f"--grep=#{issue.number}", "origin/main", "--format=%H")
if commit:
return commit
else:
pr: PullRequest = issue.as_pull_request()
if pr.is_merged():
commit = pr.merge_commit_sha
return commit
return None
def is_cherrypicked(repo: git.Repo, issue: Issue, previous_version: str | None = None) -> bool:
"""Check if a given issue is cherry-picked in the current branch or not"""
log_args = ["--format=%H", f"--grep=#{issue.number}"]
if previous_version:
log_args.append(previous_version + "..")
log = repo.git.log(*log_args)
if log:
return True
return False
def is_pr(issue: Issue) -> bool:
return "apache/airflow/pull/" in issue.html_url
def files_touched(repo, commit_id: str) -> list[str]:
real_commit = repo.commit(commit_id)
return real_commit.stats.files.keys()
def is_core_commit(files: list[str]) -> bool:
# We list out things that _aren't_ core files,
# and we want to know if the commit changes anything
# outside of these designated-non-core files
non_core_files = [
# Providers
"airflow/providers/",
"airflow/provider.yaml.schema.json",
"provider_packages/",
"docs/apache-airflow-providers-",
# chart
"chart/",
"docs/helm-chart/",
"clients",
# non-released docs
"COMMITTERS.rst",
"CONTRIBUTING.rst",
"CONTRIBUTORS_QUICK_START.rst",
"IMAGES.rst",
"LOCAL_VIRTUALENV.rst",
"INTHEWILD.md",
"INSTALL",
"README.md",
"BREEZE.rst",
"CI.rst",
"CI_DIAGRAMS.md",
"STATIC_CODE_CHECKS.rst",
"images/",
"TESTING.rst",
"codecov.yml",
"kubernetes_tests/",
".github/",
".pre-commit-config.yaml",
"yamllint-config.yml",
".markdownlint.yml",
"tests/",
"dev/",
"Dockerfile",
"Dockerfile.ci",
".hadolint.yaml",
"scripts/",
"docs/build_docs.py",
"docs/start_doc_server.sh",
".devcontainer/",
"docker-context-files/",
# Misc
".dockerignore",
"docs/spelling_wordlist.txt",
"docs/integration-logos/*",
"docs/exts/",
"docs/docker-stack",
"docs/README.rst",
"docker_tests/",
".asf.yaml",
".mailmap",
"breeze-legacy",
"breeze-complete",
".rat-excludes",
".gitattributes",
".gitpod.yml",
"generated/",
]
for file in files:
for ignore in non_core_files:
if file.startswith(ignore):
break
# Handle renaming. Form: {old_name => new_name}somename.py
elif file.startswith("{"):
new_files = file[1:].split(" => ")
if any(n.strip().startswith(ignore) for n in new_files):
break
else:
return True
return False
def print_changelog(sections):
for section, lines in sections.items():
if section == "(skip)":
continue
print(section)
print('"' * len(section))
for line in lines:
print("-", line)
print()
@click.group()
def cli():
r"""
This tool should be used by Airflow Release Manager to verify what GitHub issues
were merged in the current working branch.
airflow-github compare <target_version> <github_token>
"""
@cli.command(short_help="Compare a GitHub target version against git merges")
@click.argument("target_version")
@click.argument("github-token", envvar="GITHUB_TOKEN")
@click.option(
"--previous-version",
"previous_version",
help="Specify the previous tag on the working branch to limit"
" searching for few commits to find the cherry-picked commits",
)
@click.option("--unmerged", "show_uncherrypicked_only", help="Show unmerged PRs only", is_flag=True)
def compare(target_version, github_token, previous_version=None, show_uncherrypicked_only=False):
repo = git.Repo(".", search_parent_directories=True)
github_handler = Github(github_token)
milestone_issues: list[Issue] = list(
github_handler.search_issues(
f'repo:apache/airflow milestone:"Airflow {target_version}" is:pull-request '
)
)
milestone_issues.extend(
list(
github_handler.search_issues(
f'repo:apache/airflow milestone:"Airflow {target_version}" is:issue '
)
)
)
num_cherrypicked = 0
num_uncherrypicked = Counter()
# :<18 says left align, pad to 18, :>6 says right align, pad to 6
# :<50.50 truncates after 50 chars
# !s forces as string
formatstr = "{number:>6} | {typ!s:<5} | {changelog!s:<13} | {status!s} | {title:<83.83} | {merged:<6} | {commit:>7.7} | {url}"
print(
formatstr.format(
number="NUMBER",
typ="TYPE",
changelog="CHANGELOG",
status="STATUS".ljust(6),
title="TITLE",
merged="MERGED",
commit="COMMIT",
url="URL",
)
)
for issue in milestone_issues:
commit_in_main = get_commit_in_main_associated_with_pr(repo, issue)
status = issue.state.capitalize()
issue_is_pr = is_pr(issue)
# Checks if commit was cherrypicked into branch.
if is_cherrypicked(repo, issue, previous_version):
num_cherrypicked += 1
if show_uncherrypicked_only:
continue
cherrypicked = click.style("Yes".ljust(6), "green")
elif not issue_is_pr and show_uncherrypicked_only:
# Don't show issues when looking for unmerged PRs
continue
elif issue_is_pr:
num_uncherrypicked[status] += 1
cherrypicked = click.style("No".ljust(6), "red")
else:
cherrypicked = ""
fields = dict(
number=issue.number,
typ="PR" if issue_is_pr else "Issue",
changelog=get_issue_type(issue) if issue_is_pr else "",
status=style_issue_status(status),
title=issue.title,
url=issue.html_url,
)
print(
formatstr.format(**fields, merged=cherrypicked, commit=commit_in_main if commit_in_main else "")
)
print(
f"Commits on branch: {num_cherrypicked:d}, {sum(num_uncherrypicked.values()):d} ({dict(num_uncherrypicked)}) yet to be cherry-picked"
)
@cli.command(short_help="Build a CHANGELOG grouped by GitHub Issue type")
@click.argument("previous_version")
@click.argument("target_version")
@click.argument("github-token", envvar="GITHUB_TOKEN")
def changelog(previous_version, target_version, github_token):
repo = git.Repo(".", search_parent_directories=True)
# Get a list of issues/PRs that have been committed on the current branch.
log = get_commits_between(repo, previous_version, target_version)
gh = Github(github_token)
gh_repo = gh.get_repo("apache/airflow")
sections = defaultdict(list)
for commit in log:
tickets = pr_title_re.findall(commit["subject"])
if tickets:
issue = gh_repo.get_issue(number=int(tickets[0][1:]))
issue_type = get_issue_type(issue)
files = files_touched(repo, commit["id"])
if is_core_commit(files):
if issue_type in ["bug-fix", "doc-only", "misc/internal"]:
with open("../RELEASE_NOTES.rst") as file:
for line in file.readlines():
if line.endswith(f"(#{commit['id']})"):
continue
else:
sections[issue_type].append(commit["subject"])
else:
sections[issue_type].append(commit["subject"])
else:
sections[DEFAULT_SECTION_NAME].append(commit["subject"])
print_changelog(sections)
@cli.command(short_help="Find merged PRs that still need to be categorized for the changelog")
@click.argument("previous_version")
@click.argument("target_version")
@click.option("--show-skipped", is_flag=True)
@click.option("--show-files", is_flag=True)
@click.argument("github-token", envvar="GITHUB_TOKEN")
def needs_categorization(previous_version, target_version, show_skipped, show_files, github_token):
repo = git.Repo(".", search_parent_directories=True)
log = get_commits_between(repo, previous_version, target_version)
gh = Github(github_token)
gh_repo = gh.get_repo("apache/airflow")
for commit in log:
tickets = pr_title_re.findall(commit["subject"])
if tickets:
issue = gh_repo.get_issue(number=int(tickets[0][1:]))
issue_type = get_issue_type(issue)
if issue_type == DEFAULT_SECTION_NAME:
files = files_touched(repo, commit["id"])
if is_core_commit(files):
print(f"{commit['subject']}: {issue.html_url}")
if show_files:
for f in files:
print(f"\t{f}")
elif show_skipped:
print(f"**** {commit['subject']}: Skipping - not a core commit")
if show_files:
for f in files:
print(f"\t{f}")
else:
print(f"Commit '{commit['id']}' is missing PR number: {commit['subject']}")
@cli.command(
name="api-clients-policy",
help="Compare two airflow core release tags and determine if API clients need to be released.",
)
@click.argument("previous_version")
@click.argument("target_version")
def api_clients_policy(previous_version, target_version):
p_version = version.parse(previous_version)
t_version = version.parse(target_version)
if p_version.major != t_version.major:
print("This is a major release, API clients should also be released.")
return
if p_version.minor != t_version.minor:
print("This is a minor release, API clients should also be released.")
return
if p_version == t_version:
print("Both versions are identical")
return
repo = git.Repo(".", search_parent_directories=True)
log = get_commits_between(
repo,
previous_version,
target_version,
files=[f"{repo.working_dir}/airflow/api_connexion/openapi/v1.yaml"],
)
clients_need_release = False
for commit in log:
if "update airflow version to" not in commit["subject"].lower():
clients_need_release = True
print(f"Commit '{commit['id']}' updated the OpenAPI spec, PR number: {commit['subject']}")
if clients_need_release:
print(f"API clients need to be released because the API spec has changed since '{previous_version}'")
else:
print(
"API clients don't need to be released because the API spec hasn't changed between those two "
"patch versions"
)
if __name__ == "__main__":
import doctest
(failure_count, test_count) = doctest.testmod()
if failure_count:
sys.exit(-1)
try:
cli()
except Exception:
raise