| #!/usr/bin/env python3 |
| |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| # This file contains helper methods used in creating a release. |
| |
| import re |
| import sys |
| from subprocess import Popen, PIPE |
| |
| try: |
| from jira.client import JIRA # noqa: F401 |
| # Old versions have JIRAError in exceptions package, new (0.5+) in utils. |
| try: |
| from jira.exceptions import JIRAError |
| except ImportError: |
| from jira.utils import JIRAError |
| except ImportError: |
| print("This tool requires the jira-python library") |
| print("Install using 'sudo pip3 install jira'") |
| sys.exit(-1) |
| |
| try: |
| from github import Github # noqa: F401 |
| from github import GithubException |
| except ImportError: |
| print("This tool requires the PyGithub library") |
| print("Install using 'sudo pip install PyGithub'") |
| sys.exit(-1) |
| |
| |
| # Contributors list file name |
| contributors_file_name = "contributors.txt" |
| |
| |
| # Prompt the user to answer yes or no until they do so |
| def yesOrNoPrompt(msg): |
| response = input("%s [y/n]: " % msg) |
| while response != "y" and response != "n": |
| return yesOrNoPrompt(msg) |
| return response == "y" |
| |
| |
| # Utility functions run git commands (written with Git 1.8.5) |
| def run_cmd(cmd): |
| return Popen(cmd, stdout=PIPE).communicate()[0].decode("utf8") |
| |
| |
| def run_cmd_error(cmd): |
| return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1].decode("utf8") |
| |
| |
| def get_date(commit_hash): |
| return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash]) |
| |
| |
| def tag_exists(tag): |
| stderr = run_cmd_error(["git", "show", tag]) |
| return "error" not in stderr |
| |
| |
| # A type-safe representation of a commit |
| class Commit: |
| def __init__(self, _hash, author, title, pr_number=None): |
| self._hash = _hash |
| self.author = author |
| self.title = title |
| self.pr_number = pr_number |
| |
| def get_hash(self): |
| return self._hash |
| |
| def get_author(self): |
| return self.author |
| |
| def get_title(self): |
| return self.title |
| |
| def get_pr_number(self): |
| return self.pr_number |
| |
| def __str__(self): |
| closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else "" |
| return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr) |
| |
| |
| # Return all commits that belong to the specified tag. |
| # |
| # Under the hood, this runs a `git log` on that tag and parses the fields |
| # from the command output to construct a list of Commit objects. Note that |
| # because certain fields reside in the commit description and cannot be parsed |
| # through the GitHub API itself, we need to do some intelligent regex parsing |
| # to extract those fields. |
| # |
| # This is written using Git 1.8.5. |
| def get_commits(tag): |
| commit_start_marker = "|=== COMMIT START MARKER ===|" |
| commit_end_marker = "|=== COMMIT END MARKER ===|" |
| field_end_marker = "|=== COMMIT FIELD END MARKER ===|" |
| log_format =\ |
| commit_start_marker + "%h" +\ |
| field_end_marker + "%an" +\ |
| field_end_marker + "%s" +\ |
| commit_end_marker + "%b" |
| output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag]) |
| commits = [] |
| raw_commits = [c for c in output.split(commit_start_marker) if c] |
| for commit in raw_commits: |
| if commit.count(commit_end_marker) != 1: |
| print("Commit end marker not found in commit: ") |
| for line in commit.split("\n"): |
| print(line) |
| sys.exit(1) |
| # Separate commit digest from the body |
| # From the digest we extract the hash, author and the title |
| # From the body, we extract the PR number and the github username |
| [commit_digest, commit_body] = commit.split(commit_end_marker) |
| if commit_digest.count(field_end_marker) != 2: |
| sys.exit("Unexpected format in commit: %s" % commit_digest) |
| [_hash, author, title] = commit_digest.split(field_end_marker) |
| # The PR number and github username is in the commit message |
| # itself and cannot be accessed through any GitHub API |
| pr_number = None |
| match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body) |
| if match: |
| [pr_number, github_username] = match.groups() |
| # If the author name is not valid, use the github |
| # username so we can translate it properly later |
| if not is_valid_author(author): |
| author = github_username |
| author = author.strip() |
| commit = Commit(_hash, author, title, pr_number) |
| commits.append(commit) |
| return commits |
| |
| # Maintain a mapping for translating issue types to contributions in the release notes |
| # This serves an additional function of warning the user against unknown issue types |
| # Note: This list is partially derived from this link: |
| # https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/issuetypes |
| # Keep these in lower case |
| known_issue_types = { |
| "bug": "bug fixes", |
| "build": "build fixes", |
| "dependency upgrade": "build fixes", |
| "improvement": "improvements", |
| "new feature": "new features", |
| "documentation": "documentation", |
| "test": "test", |
| "task": "improvement", |
| "sub-task": "improvement" |
| } |
| |
| # Maintain a mapping for translating component names when creating the release notes |
| # This serves an additional function of warning the user against unknown components |
| # Note: This list is largely derived from this link: |
| # https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/components |
| CORE_COMPONENT = "Core" |
| known_components = { |
| "block manager": CORE_COMPONENT, |
| "build": CORE_COMPONENT, |
| "deploy": CORE_COMPONENT, |
| "documentation": CORE_COMPONENT, |
| "examples": CORE_COMPONENT, |
| "graphx": "GraphX", |
| "input/output": CORE_COMPONENT, |
| "java api": "Java API", |
| "k8s": "Kubernetes", |
| "kubernetes": "Kubernetes", |
| "mesos": "Mesos", |
| "ml": "MLlib", |
| "mllib": "MLlib", |
| "project infra": "Project Infra", |
| "pyspark": "PySpark", |
| "shuffle": "Shuffle", |
| "spark core": CORE_COMPONENT, |
| "spark shell": CORE_COMPONENT, |
| "sql": "SQL", |
| "streaming": "Streaming", |
| "web ui": "Web UI", |
| "windows": "Windows", |
| "yarn": "YARN" |
| } |
| |
| |
| # Translate issue types using a format appropriate for writing contributions |
| # If an unknown issue type is encountered, warn the user |
| def translate_issue_type(issue_type, issue_id, warnings): |
| issue_type = issue_type.lower() |
| if issue_type in known_issue_types: |
| return known_issue_types[issue_type] |
| else: |
| warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id)) |
| return issue_type |
| |
| |
| # Translate component names using a format appropriate for writing contributions |
| # If an unknown component is encountered, warn the user |
| def translate_component(component, commit_hash, warnings): |
| component = component.lower() |
| if component in known_components: |
| return known_components[component] |
| else: |
| warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash)) |
| return component |
| |
| |
| # Parse components in the commit message |
| # The returned components are already filtered and translated |
| def find_components(commit, commit_hash): |
| components = re.findall(r"\[\w*\]", commit.lower()) |
| components = [translate_component(c, commit_hash, []) |
| for c in components if c in known_components] |
| return components |
| |
| |
| # Join a list of strings in a human-readable manner |
| # e.g. ["Juice"] -> "Juice" |
| # e.g. ["Juice", "baby"] -> "Juice and baby" |
| # e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon" |
| def nice_join(str_list): |
| str_list = list(str_list) # sometimes it's a set |
| if not str_list: |
| return "" |
| elif len(str_list) == 1: |
| return next(iter(str_list)) |
| elif len(str_list) == 2: |
| return " and ".join(str_list) |
| else: |
| return ", ".join(str_list[:-1]) + ", and " + str_list[-1] |
| |
| |
| # Return the full name of the specified user on GitHub |
| # If the user doesn't exist, return None |
| def get_github_name(author, github_client): |
| if github_client: |
| try: |
| return github_client.get_user(author).name |
| except GithubException as e: |
| # If this is not a "not found" exception |
| if e.status != 404: |
| raise e |
| return None |
| |
| |
| # Return the full name of the specified user on JIRA |
| # If the user doesn't exist, return None |
| def get_jira_name(author, jira_client): |
| if jira_client: |
| try: |
| return jira_client.user(author).displayName |
| except JIRAError as e: |
| # If this is not a "not found" exception |
| if e.status_code != 404: |
| raise e |
| return None |
| |
| |
| # Return whether the given name is in the form <First Name><space><Last Name> |
| def is_valid_author(author): |
| if not author: |
| return False |
| return " " in author and not re.findall("[0-9]", author) |
| |
| |
| # Capitalize the first letter of each word in the given author name |
| def capitalize_author(author): |
| if not author: |
| return None |
| words = author.split(" ") |
| words = [w[0].capitalize() + w[1:] for w in words if w] |
| return " ".join(words) |