# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import os
import sys
import inspect
#validator for titles
def validate_title(title: str):
return len(re.findall('(\[HUDI\-[0-9]{1,}\]|\[MINOR\])',title)) == 1
#runs an individual title test
# name: str - the name of the test
# title: str - the title to test
# isTrue: bool - is the title valid
# bool - True if the test passed, False if it failed
def run_title_test(name: str, title: str, isTrue: bool):
if isTrue != validate_title(title):
print(f"{name} - FAILED")
return False
print(f"{name} - PASSED")
return True
#tests for title validation
# bool - True if all tests passed, False if any tests fail
def test_title():
test_return = True
#test that position doesn't matter for issue
test_return = run_title_test("issue at front", "[HUDI-1324] my fake pr", True) and test_return
test_return = run_title_test("issue in middle", " my [HUDI-1324] fake pr", True) and test_return
test_return = run_title_test("issue at end", " my fake pr [HUDI-1324]", True) and test_return
#test position doesn't matter for minor
test_return = run_title_test("minor at front", "[MINOR] my fake pr", True) and test_return
test_return = run_title_test("minor in middle", " my [MINOR] fake pr", True) and test_return
test_return = run_title_test("minor at end", " my fake pr [MINOR]", True) and test_return
#test that more than 4 nums is also ok
test_return = run_title_test("more than 4 nums in issue", "[HUDI-12345] my fake pr", True) and test_return
#test that 1 nums is also ok
test_return = run_title_test("1 num in issue", "[HUDI-1] my fake pr", True) and test_return
#no nums not ok
test_return = run_title_test("no nums in issue", "[HUDI-] my fake pr", False) and test_return
#no brackets not ok
test_return = run_title_test("no brackets around issue", "HUDI-1234 my fake pr", False) and test_return
test_return = run_title_test("no brackets around minor", "MINOR my fake pr", False) and test_return
#lowercase not ok
test_return = run_title_test("lowercase hudi", "[hudi-1234] my fake pr", False) and test_return
test_return = run_title_test("lowercase minor", "[minor] my fake pr", False) and test_return
#duplicate not ok
test_return = run_title_test("duplicate issue", "[HUDI-1324][HUDI-1324] my fake pr", False) and test_return
test_return = run_title_test("duplicate minor", "[MINOR] my fake pr [MINOR]", False) and test_return
#hudi and minor not ok
test_return = run_title_test("issue and minor", "[HUDI-1324] my [MINOR]fake pr", False) and test_return
if test_return:
print("All title tests passed")
print("Some title tests failed")
return test_return
#Enums for the outcome of parsing a single line
class Outcomes:
#error was found so we should stop parsing and exit with error
#continue to parse the next line
#All requirements for the current section have been met so we should start
#parsing the next section
#parsing has concluded succesfully, exit with no error
#Holds the data for a section
# name: str - name of the parse section
# identifier: str - line that signifies the start of a section
# linesAfter: set of str - default lines in the template that we ignore when
# verifying that the user filled out the section
class ParseSectionData:
def __init__(self, name: str, identifier: str, linesAfter: str): = name
self.identifier = identifier
self.linesAfter = linesAfter
self.prevSection = ""
self.nextSection = ""
#returns true if line matches the identifier
def identify(self, line: str):
return line == self.identifier
#returns true if user has added new text to the section
def identifyAfter(self, line: str):
return line not in self.linesAfter
#Special holder of data for risk level because the identifier line is modified
#by the user
class RiskLevelData(ParseSectionData):
def __init__(self, name: str, identifier: str, linesAfter):
super().__init__(name, identifier, linesAfter)
#we check that the line start with the identifier because the identifier
#line will be modified when filled out correctly
def identify(self, line: str):
return line.startswith(self.identifier)
#Holds all the parse section data in a map
# psd: list of ParseSectionData - a list of the data for all the parse sections
class ParseSections:
def __init__(self, psd):
self.sections = {}
assert len(psd) > 0
for i in range(len(psd)):
prevI = i - 1
nextI = i + 1
if prevI < 0:
psd[i].prevSection = "START"
psd[i].prevSection = psd[prevI].name
if nextI >= len(psd):
psd[i].nextSection = "END"
psd[i].nextSection = psd[nextI].name
self.sections[psd[i].name] = psd[i]
#returns true if line is an identifier for a section that is not value
# line: str - the line that we are parsing
# value: str - the name of the parse section that we are parsing
def validateOthers(self, line: str, value: str):
for name in self.sections:
if name != value:
if self.sections[name].identify(line):
return True
return False
#gets the name of the section identified in the line
# line: str - the line that we are parsing
# string - name of the section if the identifier is found, else none
def getSectionName(self, line: str):
for name in self.sections:
if self.sections[name].identify(line):
return name
return None
#returns the ParseSectionData that is named name
def get(self, name):
return self.sections.get(name)
#Main class used to parse a section
class ParseSection:
def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
#set to true when the sections identifier is found
self.found = False
#true if debug messages should be printed
self.debug = debug
#the data for this parse section = data
#data of all the parse sections
self.sections = sections
#prints error message if debug is set to true
def error(self, line: str, lineno: str, message: str):
if self.debug:
pyline = inspect.getframeinfo(inspect.stack()[1][0]).lineno
if lineno != "" and line != "":
print(f"::error,line={pyline}::found on line {lineno}: {line}")
print(f"::debug::state: {}, found: {self.found},")
#returns the name of the next section
def nextSection(self):
#Returns true if we have already found the section identifier and line is
#not in the default template
def validateAfter(self, line):
return self.found and
#Decides what outcome occurs when the section identifier is found
def processIdentify(self, line, lineno):
if self.found:
#since we have already found the identifier, this means that we have
#found a duplicate of the identifier
self.error(line, lineno, f"Duplicate {} section found")
return Outcomes.ERROR
self.found = True
return Outcomes.CONTINUE
def makeValidateOthersErrorMessage(self, line):
if self.found:
if self.nextSection() != "END" and self.sections.sections[self.nextSection()].identify(line):
#we found the next identifier but haven't found a description
#yet for this section
return f"Section {} is missing a description"
#we found a section other than the next section
return f"Section {} should be followed by section {}"
#section identifier has not been found yet
sectionFound = self.sections.getSectionName(line)
if sectionFound is None:
print("ERROR: none found even though validateOthers returned True")
elif sectionFound ==
#we have not found the current section identifier but we found the
#previous section identifier again
return f"Duplicate {} section found"
if == "START":
return f"Section {} should be the first section"
if sectionFound ==
return f"Missing section {} between {} and {}"
return f"Section {} was expected after section {}"
#Decides the outcome state by processing line
def validateLine(self,line,lineno):
#we have found the identifier so we should decide what to do
return self.processIdentify(line,lineno)
elif self.sections.validateOthers(line,
#we have found the identifier for another section
#figure out what the error is
return Outcomes.ERROR
elif self.validateAfter(line):
#the pr author has added new text to this section so we consider it
#to be filled out
if self.nextSection() == "END":
#if next section is "END" then there are no more sections
#to process
return Outcomes.SUCCESS
return Outcomes.NEXTSECTION
return Outcomes.CONTINUE
#We do not check this section for data
#currently just used for the checklist where they just need to check boxes
class NoDataSection(ParseSection):
def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
super().__init__(data, sections, debug)
#After finding the identifier we don't need to look for anything else so we
#can just go to the next section or terminate if this is the last
def processIdentify(self, line, lineno):
o = super().processIdentify(line, lineno)
if o == Outcomes.CONTINUE:
if self.nextSection() == "END":
return Outcomes.SUCCESS
return Outcomes.NEXTSECTION
return o
#Class that orchestrates the validation of the entire body
class ValidateBody:
def __init__(self, body: str, firstSection: str, sections: ParseSections, debug=False):
#the body of the pr post
self.body = body
#true if debug messages should be printed
self.debug = debug
#the name of the first section of the post
self.firstSection = firstSection
#the current section we are processing
self.section = None
#ParseSections which holds the data for all the sections
self.sections = sections
#Updates self.section to the next section
def nextSection(self):
#get the name of the section to parse
sectionName = self.firstSection
if self.section is not None:
sectionName = self.section.nextSection()
#get the data for that section
data = self.sections.get(sectionName)
if data is None:
print(f"ERROR with your parse section setup. Parse section {sectionName} not found")
#create the section
if == "CHECKLIST":
self.section = NoDataSection(data=data, sections=self.sections, debug=self.debug)
self.section = ParseSection(data=data, sections=self.sections, debug=self.debug)
#Returns true if the body complies with the validation rules else false
def validate(self):
#instantiate self.section since it starts null
#validate each line
for lineno, line in enumerate(self.body.splitlines(), 1):
#ignore empty lines
if len(line) == 0:
#run the parse section validation
o = self.section.validateLine(line, lineno)
#decide what to do based on outcome
if o == Outcomes.ERROR:
return False
elif o == Outcomes.SUCCESS:
return True
elif o == Outcomes.NEXTSECTION:
#if we get through all the lines without a success outcome, then the
#body does not comply
if == "END":
if self.section.found:
self.section.error("","",f"Section {} is missing a description")
return False
self.section.error("","",f"Missing section {} at the end")
return False
self.section.error("","", "Please make sure you have filled out the template correctly. You can find a blank template in /.github/")
return False
#Generate the validator for the current template.
#needs to be manually updated
def make_default_validator(body, debug=False):
changelogs = ParseSectionData("CHANGE_LOGS",
"### Change Logs",
{"_Describe context and summary for this change. Highlight if any code was copied._"})
impact = ParseSectionData("IMPACT",
"### Impact",
{"_Describe any public API or user-facing feature change or any performance impact._"})
risklevel = RiskLevelData("RISK_LEVEL",
"### Risk level",
{"_If medium or high, explain what verification was done to mitigate the risks._"})
docsUpdate = ParseSectionData("DOCUMENTATION_UPDATE",
"### Documentation Update",
{"_Describe any necessary documentation update if there is any new feature, config, or user-facing change_",
"- _The config description must be updated if new configs are added or the default value of the configs are changed. If not, put \"none\"._",
"- _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the",
" ticket number here and follow the [instruction]( to make",
" changes to the website._"})
checklist = ParseSectionData("CHECKLIST",
"### Contributor's checklist",
parseSections = ParseSections([changelogs, impact, risklevel, docsUpdate, checklist])
return ValidateBody(body, "CHANGE_LOGS", parseSections, debug)
#takes a list of strings and returns a string of those lines separated by \n
def joinLines(lines):
return "\n".join(lines)
#runs a test for parsing the body
# name: str - the name of the test
# body: str - the body to parse
# isTrue: bool - True if the body complies with our validation rules
# debug: bool - True if we want to print debug information
def run_test(name: str, body: str, isTrue: bool, debug: bool):
validator = make_default_validator(body, debug)
if isTrue != validator.validate():
print(f"{name} - FAILED")
return False
print(f"{name} - PASSED")
return True
# Given a list of sections which are lists of strings, it combines them into one
# giant string that is the body to be parsed
def build_body(sections):
res = ""
for s in sections:
res += joinLines(s) + "\n"
return res
# Tests for validating the body of a pr. Returns true if all tests pass
def test_body():
#Create sections that we will combine to create bodies to test validation on
template_changelogs = [
"### Change Logs",
"_Describe context and summary for this change. Highlight if any code was copied._",
good_changelogs = template_changelogs.copy()
good_changelogs[1] = "changelogs description"
template_impact = [
"### Impact",
"_Describe any public API or user-facing feature change or any performance impact._",
good_impact = template_impact.copy()
good_impact[1] = "impact description"
template_risklevel = [
"### Risk level (write none, low medium or high below)",
"_If medium or high, explain what verification was done to mitigate the risks._",
good_risklevel = template_risklevel.copy()
good_risklevel[1] = "none"
template_docs_update = [
"### Documentation Update",
"_Describe any necessary documentation update if there is any new feature, config, or user-facing change_",
"- _The config description must be updated if new configs are added or the default value of the configs are changed. If not, put \"none\"._",
"- _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the",
" ticket number here and follow the [instruction]( to make",
" changes to the website._",
good_docs_update = template_docs_update.copy()
good_docs_update[1] = "update docs"
template_checklist = [
"### Contributor's checklist",
"- [ ] Read through [contributor's guide](",
"- [ ] Change Logs and Impact were stated clearly",
"- [ ] Adequate tests were added if applicable",
"- [ ] CI passed"
#list of sections that when combined form a valid body
good_sections = [good_changelogs, good_impact, good_risklevel, good_docs_update, template_checklist]
#list of sections that when combined form the template
template_sections = [template_changelogs, template_impact, template_risklevel, template_docs_update, template_checklist]
tests_passed = True
#Test section not filled out
#no need to test checklist section
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
if j != i:
tests_passed = run_test(f"template section not filled out: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test duplicate section
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
if j == i:
tests_passed = run_test(f"duplicate section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test out of order section
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
for k in range(i+1,len(good_sections)):
test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test missing section
for i in range(len(good_sections)):
test_sections = []
for j in range(len(good_sections)):
if i != j:
tests_passed = run_test(f"Missing Section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
#Test good body:
tests_passed = run_test("good documentation", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
if tests_passed:
print("All body tests passed")
print("Some body tests failed")
return tests_passed
if __name__ == '__main__':
if len(sys.argv) > 1:
title_tests = test_title()
body_tests = test_body()
if title_tests and body_tests:
title = os.getenv("REQUEST_TITLE")
body = os.getenv("REQUEST_BODY")
if title is None:
print("no title")
if not validate_title(title):
print("invalid title")
if body is None:
print("no pr body")
validator = make_default_validator(body,True)
if not validator.validate():