tests/scripts/release/make_notes.py - tvm - Git at Google

 #!/usr/bin/env python3
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import argparse
 import pickle
 from pathlib import Path
 import csv
 import sys
 import re
 from collections import defaultdict

 REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
 sys.path.append(str(REPO_ROOT / "tests" / "scripts"))
 sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "github"))
 sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "jenkins"))

 # Tag dictionary used to create a mapping relation to categorize PRs owning same tag.
 TAG_DICT = {
     "metaschedule": "MetaSchedule",
     "cuda": "cuda & cutlass & tensorrt",
     "cutlass": "cuda & cutlass & tensorrt",
     "tensorrt": "cuda & cutlass & tensorrt",
     "hexagon": "Hexagon",
     "metal": "Metal",
     "vulkan": "Vulkan",
     "clml": "OpenCL & CLML",
     "opencl": "OpenCL & CLML",
     "openclml": "OpenCL & CLML",
     "adreno": "Adreno",
     "acl": "ArmComputeLibrary",
     "rocm": "ROCm",
     "crt": "CRT",
     "web": "web",
     "wasm": "web",
     "runtime": "Runtime",
     "aot": "AOT",
     "arith": "Arith",
     "byoc": "BYOC",
     "community": "Community",
     "tensorir": "TIR",
     "tir": "TIR",
     "tensorflow": "Frontend",
     "tflite": "Frontend",
     "pytorch": "Frontend",
     "torch": "Frontend",
     "keras": "Frontend",
     "frontend": "Frontend",
     "onnx": "Frontend",
     "roofline": "Misc",
     "rpc": "Misc",
     "transform": "Misc",
     "tophub": "Misc",
     "ux": "Misc",
     "APP": "Misc",
     "docker": "Docker",
     "doc": "Docs",
     "docs": "Docs",
     "llvm": "LLVM",
     "sve": "LLVM",
     "ci": "CI",
     "test": "CI",
     "tests": "CI",
     "testing": "CI",
     "unittest": "CI",
     "bugfix": "BugFix",
     "fix": "BugFix",
     "bug": "BugFix",
     "hotfix": "BugFix",
     "qnn": "Relay",
     "quantization": "Relay",
     "relax": "Relax",
     "unity": "Relax",
     "transform": "Relax",
     "kvcache": "Relax",
     "s_tir": "S-TIR",
     "disco": "Disco",
     "tvmscript": "TVMScript",
     "tvmscripts": "TVMScript",
     "tvmc": "TVMC",
     "topi": "TOPI",
 }


 def strip_header(title: str, header: str) -> str:
     pos = title.lower().find(header.lower())
     if pos == -1:
         return title

     return title[0:pos] + title[pos + len(header) :].strip()


 def sprint(*args):
     print(*args, file=sys.stderr)


 def create_pr_dict(cache: Path):
     with open(cache, "rb") as f:
         data = pickle.load(f)

     sprint(data[1])
     pr_dict = {}
     for item in data:
         prs = item["associatedPullRequests"]["nodes"]
         if len(prs) != 1:
             continue

         pr = prs[0]
         pr_dict[pr["number"]] = pr
     return pr_dict


 def categorize_csv_file(csv_path: str):
     headings = defaultdict(lambda: defaultdict(list))
     sprint("Opening CSV")
     with open(csv_path) as f:
         input_file = csv.DictReader(f)

         i = 0
         blank_cate_set = {"Misc"}
         for row in input_file:
             # print(row)
             tags = row["pr_title_tags"].split("/")
             tags = ["misc"] if len(tags) == 0 else tags

             categories = map(lambda t: TAG_DICT.get(t.lower(), "Misc"), tags)
             categories = list(categories)
             categories = list(set(categories) - blank_cate_set)
             category = "Misc" if len(categories) == 0 else categories[0]

             subject = row["subject"].strip()
             pr_number = row["url"].split("/")[-1]

             if category == "" or subject == "":
                 sprint(f"Skipping {i}th pr with number: {pr_number}, row: {row}")
                 continue

             headings[category][subject].append(pr_number)
             i += 1
             # if i > 30:
             #     break
     return headings


 if __name__ == "__main__":
     help = "List out commits with attached PRs since a certain commit"
     parser = argparse.ArgumentParser(description=help)
     parser.add_argument(
         "--notes", required=True, help="csv or markdown file of categorized PRs in order"
     )
     parser.add_argument(
         "--is-pr-with-link",
         required=False,
         help="exported pr number with hyper-link for forum format",
     )
     parser.add_argument(
         "--convert-with-link",
         required=False,
         help="make PR number in markdown file owning hyper-link",
     )
     args = parser.parse_args()
     user = "apache"
     repo = "tvm"

     if args.convert_with_link:
         with open(args.notes, "r") as f:
             lines = f.readlines()
         formated = []
         for line in lines:
             match = re.search(r"#\d+", line)
             if match:
                 pr_num_str = match.group()
                 pr_num_int = pr_num_str.replace("#", "")
                 pr_number_str = f"[#{pr_num_int}](https://github.com/apache/tvm/pull/{pr_num_int})"
                 line = line.replace(pr_num_str, pr_number_str)
             formated.append(line)
         result = "".join(formated)
         print(result)
         exit(0)

     # 1. Create PR dict from cache file
     cache = Path("out.pkl")
     if not cache.exists():
         sprint("run gather_prs.py first to generate out.pkl")
         exit(1)
     pr_dict = create_pr_dict(cache)

     # 2. Categorize csv file as dict by category and subject (sub-category)
     headings = categorize_csv_file(args.notes)

     # 3. Summarize and sort all categories
     def sorter(x):
         if x == "Misc":
             return 10
         return 0

     keys = list(headings.keys())
     keys = list(sorted(keys))
     keys = list(sorted(keys, key=sorter))

     # 4. Generate markdown by loop categorized csv file dict
     def pr_title(number, heading):
         # print(f"number:{number}, heading:{heading}, len(pr_dict):{len(pr_dict)}")
         try:
             title = pr_dict[int(number)]["title"]
             title = strip_header(title, heading)
         except:
             sprint("The out.pkl file is not match with csv file.")
             exit(1)
         return title

     output = ""
     for key in keys:
         value = headings[key]
         if key == "DO NOT INCLUDE":
             continue
         value = dict(value)
         output += f"### {key}\n"
         misc = []
         misc += value.get("n/a", [])
         misc += value.get("Misc", [])
         for pr_number in misc:
             if args.is_pr_with_link:
                 pr_number_str = f"[#{pr_number}](https://github.com/apache/tvm/pull/{pr_number})"
             else:
                 pr_number_str = f"#{pr_number}"
             pr_str = f" * {pr_number_str} - {pr_title(pr_number, '[' + key + ']')}\n"
             output += pr_str

         for subheading, pr_numbers in value.items():
             if subheading == "DO NOT INCLUDE":
                 continue
             if subheading == "n/a" or subheading == "Misc":
                 continue
             else:
                 output += f" * {subheading} - " + ", ".join([f"#{n}" for n in pr_numbers]) + "\n"
             # print(value)

         output += "\n"

     # 5. Print markdown-format output
     print(output)
	#!/usr/bin/env python3
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import argparse
	import pickle
	from pathlib import Path
	import csv
	import sys
	import re
	from collections import defaultdict

	REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
	sys.path.append(str(REPO_ROOT / "tests" / "scripts"))
	sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "github"))
	sys.path.append(str(REPO_ROOT / "tests" / "scripts" / "jenkins"))

	# Tag dictionary used to create a mapping relation to categorize PRs owning same tag.
	TAG_DICT = {
	"metaschedule": "MetaSchedule",
	"cuda": "cuda & cutlass & tensorrt",
	"cutlass": "cuda & cutlass & tensorrt",
	"tensorrt": "cuda & cutlass & tensorrt",
	"hexagon": "Hexagon",
	"metal": "Metal",
	"vulkan": "Vulkan",
	"clml": "OpenCL & CLML",
	"opencl": "OpenCL & CLML",
	"openclml": "OpenCL & CLML",
	"adreno": "Adreno",
	"acl": "ArmComputeLibrary",
	"rocm": "ROCm",
	"crt": "CRT",
	"web": "web",
	"wasm": "web",
	"runtime": "Runtime",
	"aot": "AOT",
	"arith": "Arith",
	"byoc": "BYOC",
	"community": "Community",
	"tensorir": "TIR",
	"tir": "TIR",
	"tensorflow": "Frontend",
	"tflite": "Frontend",
	"pytorch": "Frontend",
	"torch": "Frontend",
	"keras": "Frontend",
	"frontend": "Frontend",
	"onnx": "Frontend",
	"roofline": "Misc",
	"rpc": "Misc",
	"transform": "Misc",
	"tophub": "Misc",
	"ux": "Misc",
	"APP": "Misc",
	"docker": "Docker",
	"doc": "Docs",
	"docs": "Docs",
	"llvm": "LLVM",
	"sve": "LLVM",
	"ci": "CI",
	"test": "CI",
	"tests": "CI",
	"testing": "CI",
	"unittest": "CI",
	"bugfix": "BugFix",
	"fix": "BugFix",
	"bug": "BugFix",
	"hotfix": "BugFix",
	"qnn": "Relay",
	"quantization": "Relay",
	"relax": "Relax",
	"unity": "Relax",
	"transform": "Relax",
	"kvcache": "Relax",
	"s_tir": "S-TIR",
	"disco": "Disco",
	"tvmscript": "TVMScript",
	"tvmscripts": "TVMScript",
	"tvmc": "TVMC",
	"topi": "TOPI",
	}


	def strip_header(title: str, header: str) -> str:
	pos = title.lower().find(header.lower())
	if pos == -1:
	return title

	return title[0:pos] + title[pos + len(header) :].strip()


	def sprint(*args):
	print(*args, file=sys.stderr)


	def create_pr_dict(cache: Path):
	with open(cache, "rb") as f:
	data = pickle.load(f)

	sprint(data[1])
	pr_dict = {}
	for item in data:
	prs = item["associatedPullRequests"]["nodes"]
	if len(prs) != 1:
	continue

	pr = prs[0]
	pr_dict[pr["number"]] = pr
	return pr_dict


	def categorize_csv_file(csv_path: str):
	headings = defaultdict(lambda: defaultdict(list))
	sprint("Opening CSV")
	with open(csv_path) as f:
	input_file = csv.DictReader(f)

	i = 0
	blank_cate_set = {"Misc"}
	for row in input_file:
	# print(row)
	tags = row["pr_title_tags"].split("/")
	tags = ["misc"] if len(tags) == 0 else tags

	categories = map(lambda t: TAG_DICT.get(t.lower(), "Misc"), tags)
	categories = list(categories)
	categories = list(set(categories) - blank_cate_set)
	category = "Misc" if len(categories) == 0 else categories[0]

	subject = row["subject"].strip()
	pr_number = row["url"].split("/")[-1]

	if category == "" or subject == "":
	sprint(f"Skipping {i}th pr with number: {pr_number}, row: {row}")
	continue

	headings[category][subject].append(pr_number)
	i += 1
	# if i > 30:
	# break
	return headings


	if __name__ == "__main__":
	help = "List out commits with attached PRs since a certain commit"
	parser = argparse.ArgumentParser(description=help)
	parser.add_argument(
	"--notes", required=True, help="csv or markdown file of categorized PRs in order"
	)
	parser.add_argument(
	"--is-pr-with-link",
	required=False,
	help="exported pr number with hyper-link for forum format",
	)
	parser.add_argument(
	"--convert-with-link",
	required=False,
	help="make PR number in markdown file owning hyper-link",
	)
	args = parser.parse_args()
	user = "apache"
	repo = "tvm"

	if args.convert_with_link:
	with open(args.notes, "r") as f:
	lines = f.readlines()
	formated = []
	for line in lines:
	match = re.search(r"#\d+", line)
	if match:
	pr_num_str = match.group()
	pr_num_int = pr_num_str.replace("#", "")
	pr_number_str = f"[#{pr_num_int}](https://github.com/apache/tvm/pull/{pr_num_int})"
	line = line.replace(pr_num_str, pr_number_str)
	formated.append(line)
	result = "".join(formated)
	print(result)
	exit(0)

	# 1. Create PR dict from cache file
	cache = Path("out.pkl")
	if not cache.exists():
	sprint("run gather_prs.py first to generate out.pkl")
	exit(1)
	pr_dict = create_pr_dict(cache)

	# 2. Categorize csv file as dict by category and subject (sub-category)
	headings = categorize_csv_file(args.notes)

	# 3. Summarize and sort all categories
	def sorter(x):
	if x == "Misc":
	return 10
	return 0

	keys = list(headings.keys())
	keys = list(sorted(keys))
	keys = list(sorted(keys, key=sorter))

	# 4. Generate markdown by loop categorized csv file dict
	def pr_title(number, heading):
	# print(f"number:{number}, heading:{heading}, len(pr_dict):{len(pr_dict)}")
	try:
	title = pr_dict[int(number)]["title"]
	title = strip_header(title, heading)
	except:
	sprint("The out.pkl file is not match with csv file.")
	exit(1)
	return title

	output = ""
	for key in keys:
	value = headings[key]
	if key == "DO NOT INCLUDE":
	continue
	value = dict(value)
	output += f"### {key}\n"
	misc = []
	misc += value.get("n/a", [])
	misc += value.get("Misc", [])
	for pr_number in misc:
	if args.is_pr_with_link:
	pr_number_str = f"[#{pr_number}](https://github.com/apache/tvm/pull/{pr_number})"
	else:
	pr_number_str = f"#{pr_number}"
	pr_str = f" * {pr_number_str} - {pr_title(pr_number, '[' + key + ']')}\n"
	output += pr_str

	for subheading, pr_numbers in value.items():
	if subheading == "DO NOT INCLUDE":
	continue
	if subheading == "n/a" or subheading == "Misc":
	continue
	else:
	output += f" * {subheading} - " + ", ".join([f"#{n}" for n in pr_numbers]) + "\n"
	# print(value)

	output += "\n"

	# 5. Print markdown-format output
	print(output)