dev-support/determine-flaky-tests-hadoop.py - hadoop - Git at Google

 #!/usr/bin/env python
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Given a jenkins test job, this script examines all runs of the job done
 # within specified period of time (number of days prior to the execution
 # time of this script), and reports all failed tests.
 #
 # The output of this script includes a section for each run that has failed
 # tests, with each failed test name listed.
 #
 # More importantly, at the end, it outputs a summary section to list all failed
 # tests within all examined runs, and indicate how many runs a same test
 # failed, and sorted all failed tests by how many runs each test failed.
 #
 # This way, when we see failed tests in PreCommit build, we can quickly tell
 # whether a failed test is a new failure, or it failed before and how often it
 # failed, so to have idea whether it may just be a flaky test.
 #
 # Of course, to be 100% sure about the reason of a test failure, closer look
 # at the failed test for the specific run is necessary.
 #
 import sys
 import platform
 sysversion = sys.hexversion
 onward30 = False
 if sysversion < 0x020600F0:
   sys.exit("Minimum supported python version is 2.6, the current version is " +
       "Python" + platform.python_version())

 if sysversion == 0x030000F0:
   sys.exit("There is a known bug with Python" + platform.python_version() +
       ", please try a different version");

 if sysversion < 0x03000000:
   import urllib2
 else:
   onward30 = True
   import urllib.request

 import datetime
 import json as simplejson
 import logging
 from optparse import OptionParser
 import time

 # Configuration
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
 DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
 DEFAULT_NUM_PREVIOUS_DAYS = 14
 DEFAULT_TOP_NUM_FAILED_TEST = -1

 SECONDS_PER_DAY = 86400

 # total number of runs to examine
 numRunsToExamine = 0

 #summary mode
 summary_mode = False

 #total number of errors
 error_count = 0

 """ Parse arguments """
 def parse_args():
   parser = OptionParser()
   parser.add_option("-J", "--jenkins-url", type="string",
                     dest="jenkins_url", help="Jenkins URL",
                     default=DEFAULT_JENKINS_URL)
   parser.add_option("-j", "--job-name", type="string",
                     dest="job_name", help="Job name to look at",
                     default=DEFAULT_JOB_NAME)
   parser.add_option("-n", "--num-days", type="int",
                     dest="num_prev_days", help="Number of days to examine",
                     default=DEFAULT_NUM_PREVIOUS_DAYS)
   parser.add_option("-t", "--top", type="int",
                     dest="num_failed_tests",
                     help="Summary Mode, only show top number of failed tests",
                     default=DEFAULT_TOP_NUM_FAILED_TEST)

   (options, args) = parser.parse_args()
   if args:
     parser.error("unexpected arguments: " + repr(args))
   return options

 """ Load data from specified url """
 def load_url_data(url):
   if onward30:
     ourl = urllib.request.urlopen(url)
     codec = ourl.info().get_param('charset')
     content = ourl.read().decode(codec)
     data = simplejson.loads(content, strict=False)
   else:
     ourl = urllib2.urlopen(url)
     data = simplejson.load(ourl, strict=False)
   return data

 """ List all builds of the target project. """
 def list_builds(jenkins_url, job_name):
   global summary_mode
   url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
       jenkins=jenkins_url,
       job_name=job_name)

   try:
     data = load_url_data(url)

   except:
     if not summary_mode:
       logging.error("Could not fetch: %s" % url)
     error_count += 1
     raise
   return data['builds']

 """ Find the names of any tests which failed in the given build output URL. """
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
   global summary_mode
   global error_count
   ret = set()
   try:
     data = load_url_data(testReportApiJson)

   except:
     if not summary_mode:
       logging.error("    Could not open testReport, check " +
         jobConsoleOutput + " for why it was reported failed")
     error_count += 1
     return ret

   for suite in data['suites']:
     for cs in suite['cases']:
       status = cs['status']
       errDetails = cs['errorDetails']
       if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
         ret.add(cs['className'] + "." + cs['name'])

   if len(ret) == 0 and (not summary_mode):
     logging.info("    No failed tests in testReport, check " +
         jobConsoleOutput + " for why it was reported failed.")
   return ret

 """ Iterate runs of specfied job within num_prev_days and collect results """
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
   global numRunsToExamine
   global summary_mode
   all_failing = dict()
   # First list all builds
   builds = list_builds(jenkins_url, job_name)

   # Select only those in the last N days
   min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
   builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]

   # Filter out only those that failed
   failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
       if (b['result'] in ('UNSTABLE', 'FAILURE'))]

   tnum = len(builds)
   num = len(failing_build_urls)
   numRunsToExamine = tnum
   if not summary_mode:
     logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
       + ") that have failed tests in the past " + str(num_prev_days) + " days"
       + ((".", ", as listed below:\n")[num > 0]))

   for failed_build_with_time in failing_build_urls:
     failed_build = failed_build_with_time[0];
     jobConsoleOutput = failed_build + "Console";
     testReport = failed_build + "testReport";
     testReportApiJson = testReport + "/api/json";

     ts = float(failed_build_with_time[1]) / 1000.
     st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
     if not summary_mode:
       logging.info("===>%s" % str(testReport) + " (" + st + ")")
     failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
     if failing:
       for ftest in failing:
         if not summary_mode:
           logging.info("    Failed test: %s" % ftest)
         all_failing[ftest] = all_failing.get(ftest,0)+1

   return all_failing

 def main():
   global numRunsToExamine
   global summary_mode
   logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

   # set up logger to write to stdout
   soh = logging.StreamHandler(sys.stdout)
   soh.setLevel(logging.INFO)
   logger = logging.getLogger()
   logger.removeHandler(logger.handlers[0])
   logger.addHandler(soh)

   opts = parse_args()
   logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
       + "/job/" + opts.job_name + "")

   if opts.num_failed_tests != -1:
     summary_mode = True

   all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
       opts.num_prev_days)
   if len(all_failing) == 0:
     raise SystemExit(0)

   if summary_mode and opts.num_failed_tests < len(all_failing):
     logging.info("\nAmong " + str(numRunsToExamine) +
                  " runs examined, top " + str(opts.num_failed_tests) +
                  " failed tests <#failedRuns: testName>:")
   else:
       logging.info("\nAmong " + str(numRunsToExamine) +
                    " runs examined, all failed tests <#failedRuns: testName>:")

   # print summary section: all failed tests sorted by how many times they failed
   line_count = 0
   for tn in sorted(all_failing, key=all_failing.get, reverse=True):
     logging.info("    " + str(all_failing[tn])+ ": " + tn)
     if summary_mode:
       line_count += 1
       if line_count == opts.num_failed_tests:
         break

   if summary_mode and error_count > 0:
     logging.info("\n" + str(error_count) + " errors found, you may "
                  + "re-run in non summary mode to see error details.");

 if __name__ == "__main__":
   main()
	#!/usr/bin/env python
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# Given a jenkins test job, this script examines all runs of the job done
	# within specified period of time (number of days prior to the execution
	# time of this script), and reports all failed tests.
	#
	# The output of this script includes a section for each run that has failed
	# tests, with each failed test name listed.
	#
	# More importantly, at the end, it outputs a summary section to list all failed
	# tests within all examined runs, and indicate how many runs a same test
	# failed, and sorted all failed tests by how many runs each test failed.
	#
	# This way, when we see failed tests in PreCommit build, we can quickly tell
	# whether a failed test is a new failure, or it failed before and how often it
	# failed, so to have idea whether it may just be a flaky test.
	#
	# Of course, to be 100% sure about the reason of a test failure, closer look
	# at the failed test for the specific run is necessary.
	#
	import sys
	import platform
	sysversion = sys.hexversion
	onward30 = False
	if sysversion < 0x020600F0:
	sys.exit("Minimum supported python version is 2.6, the current version is " +
	"Python" + platform.python_version())

	if sysversion == 0x030000F0:
	sys.exit("There is a known bug with Python" + platform.python_version() +
	", please try a different version");

	if sysversion < 0x03000000:
	import urllib2
	else:
	onward30 = True
	import urllib.request

	import datetime
	import json as simplejson
	import logging
	from optparse import OptionParser
	import time

	# Configuration
	DEFAULT_JENKINS_URL = "https://builds.apache.org"
	DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
	DEFAULT_NUM_PREVIOUS_DAYS = 14
	DEFAULT_TOP_NUM_FAILED_TEST = -1

	SECONDS_PER_DAY = 86400

	# total number of runs to examine
	numRunsToExamine = 0

	#summary mode
	summary_mode = False

	#total number of errors
	error_count = 0

	""" Parse arguments """
	def parse_args():
	parser = OptionParser()
	parser.add_option("-J", "--jenkins-url", type="string",
	dest="jenkins_url", help="Jenkins URL",
	default=DEFAULT_JENKINS_URL)
	parser.add_option("-j", "--job-name", type="string",
	dest="job_name", help="Job name to look at",
	default=DEFAULT_JOB_NAME)
	parser.add_option("-n", "--num-days", type="int",
	dest="num_prev_days", help="Number of days to examine",
	default=DEFAULT_NUM_PREVIOUS_DAYS)
	parser.add_option("-t", "--top", type="int",
	dest="num_failed_tests",
	help="Summary Mode, only show top number of failed tests",
	default=DEFAULT_TOP_NUM_FAILED_TEST)

	(options, args) = parser.parse_args()
	if args:
	parser.error("unexpected arguments: " + repr(args))
	return options

	""" Load data from specified url """
	def load_url_data(url):
	if onward30:
	ourl = urllib.request.urlopen(url)
	codec = ourl.info().get_param('charset')
	content = ourl.read().decode(codec)
	data = simplejson.loads(content, strict=False)
	else:
	ourl = urllib2.urlopen(url)
	data = simplejson.load(ourl, strict=False)
	return data

	""" List all builds of the target project. """
	def list_builds(jenkins_url, job_name):
	global summary_mode
	url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
	jenkins=jenkins_url,
	job_name=job_name)

	try:
	data = load_url_data(url)

	except:
	if not summary_mode:
	logging.error("Could not fetch: %s" % url)
	error_count += 1
	raise
	return data['builds']

	""" Find the names of any tests which failed in the given build output URL. """
	def find_failing_tests(testReportApiJson, jobConsoleOutput):
	global summary_mode
	global error_count
	ret = set()
	try:
	data = load_url_data(testReportApiJson)

	except:
	if not summary_mode:
	logging.error(" Could not open testReport, check " +
	jobConsoleOutput + " for why it was reported failed")
	error_count += 1
	return ret

	for suite in data['suites']:
	for cs in suite['cases']:
	status = cs['status']
	errDetails = cs['errorDetails']
	if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
	ret.add(cs['className'] + "." + cs['name'])

	if len(ret) == 0 and (not summary_mode):
	logging.info(" No failed tests in testReport, check " +
	jobConsoleOutput + " for why it was reported failed.")
	return ret

	""" Iterate runs of specfied job within num_prev_days and collect results """
	def find_flaky_tests(jenkins_url, job_name, num_prev_days):
	global numRunsToExamine
	global summary_mode
	all_failing = dict()
	# First list all builds
	builds = list_builds(jenkins_url, job_name)

	# Select only those in the last N days
	min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
	builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]

	# Filter out only those that failed
	failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
	if (b['result'] in ('UNSTABLE', 'FAILURE'))]

	tnum = len(builds)
	num = len(failing_build_urls)
	numRunsToExamine = tnum
	if not summary_mode:
	logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
	+ ") that have failed tests in the past " + str(num_prev_days) + " days"
	+ ((".", ", as listed below:\n")[num > 0]))

	for failed_build_with_time in failing_build_urls:
	failed_build = failed_build_with_time[0];
	jobConsoleOutput = failed_build + "Console";
	testReport = failed_build + "testReport";
	testReportApiJson = testReport + "/api/json";

	ts = float(failed_build_with_time[1]) / 1000.
	st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
	if not summary_mode:
	logging.info("===>%s" % str(testReport) + " (" + st + ")")
	failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
	if failing:
	for ftest in failing:
	if not summary_mode:
	logging.info(" Failed test: %s" % ftest)
	all_failing[ftest] = all_failing.get(ftest,0)+1

	return all_failing

	def main():
	global numRunsToExamine
	global summary_mode
	logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

	# set up logger to write to stdout
	soh = logging.StreamHandler(sys.stdout)
	soh.setLevel(logging.INFO)
	logger = logging.getLogger()
	logger.removeHandler(logger.handlers[0])
	logger.addHandler(soh)

	opts = parse_args()
	logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
	+ "/job/" + opts.job_name + "")

	if opts.num_failed_tests != -1:
	summary_mode = True

	all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
	opts.num_prev_days)
	if len(all_failing) == 0:
	raise SystemExit(0)

	if summary_mode and opts.num_failed_tests < len(all_failing):
	logging.info("\nAmong " + str(numRunsToExamine) +
	" runs examined, top " + str(opts.num_failed_tests) +
	" failed tests <#failedRuns: testName>:")
	else:
	logging.info("\nAmong " + str(numRunsToExamine) +
	" runs examined, all failed tests <#failedRuns: testName>:")

	# print summary section: all failed tests sorted by how many times they failed
	line_count = 0
	for tn in sorted(all_failing, key=all_failing.get, reverse=True):
	logging.info(" " + str(all_failing[tn])+ ": " + tn)
	if summary_mode:
	line_count += 1
	if line_count == opts.num_failed_tests:
	break

	if summary_mode and error_count > 0:
	logging.info("\n" + str(error_count) + " errors found, you may "
	+ "re-run in non summary mode to see error details.");

	if __name__ == "__main__":
	main()