regression_report.py - flink-benchmarks - Git at Google

 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 ################################################################################
 #  Licensed to the Apache Software Foundation (ASF) under one
 #  or more contributor license agreements.  See the NOTICE file
 #  distributed with this work for additional information
 #  regarding copyright ownership.  The ASF licenses this file
 #  to you under the Apache License, Version 2.0 (the
 #  "License"); you may not use this file except in compliance
 #  with the License.  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 # limitations under the License.
 ################################################################################

 import datetime
 import urllib
 import urllib2
 import argparse
 import json
 import re

 DEFAULT_CODESPEED_URL = 'http://codespeed.dak8s.net:8000/'
 ENVIRONMENT = 2

 current_date = datetime.datetime.today()

 parser = argparse.ArgumentParser(description='Upload jmh benchmark csv results')
 parser.add_argument('--base-line-size', dest='baseLine', required=False, default=100, type=int,
                     help='Number of samples taken as the base line.')
 parser.add_argument('--download-samples-size', dest='downloadSamples', required=False, default=200,
                     type=int,
                     help='Number of samples download from the codespeed. Not all values are '
                          'working.')
 parser.add_argument('--recent-trend-size', dest='recentTrend', required=False, default=20, type=int,
                     help='Recent trend size, that is used to compare against the base line.')
 parser.add_argument('--median-trend-threshold', dest='medianTrendThreshold', required=False,
                     default=-4, type=float,
                     help="Tolerated threshold for change between baseline and recent trend median "
                          "in percentages")
 parser.add_argument('--dev-ratio-threshold', dest='devRatioThreshold', required=False, default=5,
                     type=float)
 parser.add_argument('--codespeed', dest='codespeed', default=DEFAULT_CODESPEED_URL,
                     help='codespeed url, default: %s' % DEFAULT_CODESPEED_URL)

 """
 Returns a dict executable id -> revision
 """
 def loadExecutableAndRevisions(codespeedUrl):
     revisions = {}
     url = codespeedUrl + 'reports'
     f = urllib2.urlopen(url)
     response = f.read()
     f.close()
     for line in response.split('\n'):
         # Find urls like: /changes/?rev=b8e7fc387dd-ffcdbb4-1647231150&amp;exe=1&amp;env=Hetzner
         # and extract rev and exe params out of it
         reports = dict(re.findall(r'([a-z]+)=([a-z0-9\-]+)', line))
         if "exe" in reports and "rev" in reports:
             exe = reports["exe"]
             rev = reports["rev"]
             # remember only the first (latest) revision for the given executable
             if exe not in revisions:
                 revisions[exe] = rev
     return revisions

 """
 Returns a dict executable -> benchmark names
 """
 def loadBenchmarkNames(codespeedUrl):
     execToBenchmarks = {}
     revisions = loadExecutableAndRevisions(codespeedUrl)
     # Per each revision/executable ask for benchmark names
     # http://codespeed.dak8s.net:8000/changes/table/?tre=10&rev=b9593715f35-ffcdbb4-1647399268&exe=1&env=2
     for exe, rev in revisions.items():
         url = codespeedUrl + 'changes/table/?' + urllib.urlencode({'tre': '10', 'rev': rev, 'exe': exe, 'env': ENVIRONMENT})
         f = urllib2.urlopen(url)
         response = f.read()
         f.close()
         benchmarks = []
         for line in response.split("\n"):
             # look for lines like and extract benchmark name between characters > and <
             # <td title="">remoteSortPartition</td>
             if '<td title="">' in line:
                 benchmark = re.findall(r'\>([^<]+)\<', line)[0]
                 benchmarks.append(benchmark)
         execToBenchmarks[exe] = benchmarks
     return execToBenchmarks

 """
 Returns a list of benchmark results
 """
 def loadData(codespeedUrl, exe, benchmark, downloadSamples):
     url = codespeedUrl + 'timeline/json/?' + urllib.urlencode({'exe': exe, 'ben': benchmark, 'env': ENVIRONMENT, 'revs': downloadSamples})
     f = urllib2.urlopen(url)
     response = f.read()
     f.close()
     timelines = json.loads(response)['timelines'][0]
     result = timelines['branches']['master'][exe]
     lessIsbBetter = (timelines['lessisbetter'] == " (less is better)")
     return [score for (date, score, deviation, commit, branch) in result], lessIsbBetter

 def getMedian(lst):
     lst = sorted(lst)  # Sort the list first
     if len(lst) % 2 == 0:  # Checking if the length is even
         # Applying formula which is sum of middle two divided by 2
         return (lst[len(lst) // 2] + lst[(len(lst) - 1) // 2]) / 2
     else:
         # If length is odd then get middle value
         return lst[len(lst) // 2]

 def isThresholdReached(threshold, baselineValue, comparedValue, lessIsbBetter):
     ratio = 0
     if baselineValue != 0:
         ratio = comparedValue * 100 / baselineValue - 100
     if lessIsbBetter:
         if ratio > (-1 * threshold):
             return True
     elif ratio < threshold:
         return True

     return False

 def checkBenchmark(args, exe, benchmark):
     results, lessIsbBetter = loadData(args.codespeed, exe, benchmark, args.downloadSamples)

     urlToBenchmark = args.codespeed + 'timeline/#/?' + urllib.urlencode({
         'ben': benchmark,
         'exe': exe,
         'env': ENVIRONMENT,
         'revs': args.downloadSamples,
         'equid': 'off',
         'quarts': 'on',
         'extr': 'on'})

     if len(results) < args.baseLine:
         return
     median = getMedian(results[args.recentTrend:args.baseLine])
     recent_median = getMedian(results[:args.recentTrend])
     if isThresholdReached(args.medianTrendThreshold, median, recent_median, lessIsbBetter):
         print "<%s|%s> median=%s recent_median=%s" % (urlToBenchmark, benchmark, median, recent_median)

 if __name__ == "__main__":
     args = parser.parse_args()
     execToBenchmarks = loadBenchmarkNames(args.codespeed)
     for exe, benchmarks in execToBenchmarks.items():
         for benchmark in benchmarks:
             checkBenchmark(args, exe, benchmark)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	################################################################################
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	################################################################################

	import datetime
	import urllib
	import urllib2
	import argparse
	import json
	import re

	DEFAULT_CODESPEED_URL = 'http://codespeed.dak8s.net:8000/'
	ENVIRONMENT = 2

	current_date = datetime.datetime.today()

	parser = argparse.ArgumentParser(description='Upload jmh benchmark csv results')
	parser.add_argument('--base-line-size', dest='baseLine', required=False, default=100, type=int,
	help='Number of samples taken as the base line.')
	parser.add_argument('--download-samples-size', dest='downloadSamples', required=False, default=200,
	type=int,
	help='Number of samples download from the codespeed. Not all values are '
	'working.')
	parser.add_argument('--recent-trend-size', dest='recentTrend', required=False, default=20, type=int,
	help='Recent trend size, that is used to compare against the base line.')
	parser.add_argument('--median-trend-threshold', dest='medianTrendThreshold', required=False,
	default=-4, type=float,
	help="Tolerated threshold for change between baseline and recent trend median "
	"in percentages")
	parser.add_argument('--dev-ratio-threshold', dest='devRatioThreshold', required=False, default=5,
	type=float)
	parser.add_argument('--codespeed', dest='codespeed', default=DEFAULT_CODESPEED_URL,
	help='codespeed url, default: %s' % DEFAULT_CODESPEED_URL)

	"""
	Returns a dict executable id -> revision
	"""
	def loadExecutableAndRevisions(codespeedUrl):
	revisions = {}
	url = codespeedUrl + 'reports'
	f = urllib2.urlopen(url)
	response = f.read()
	f.close()
	for line in response.split('\n'):
	# Find urls like: /changes/?rev=b8e7fc387dd-ffcdbb4-1647231150&exe=1&env=Hetzner
	# and extract rev and exe params out of it
	reports = dict(re.findall(r'([a-z]+)=([a-z0-9\-]+)', line))
	if "exe" in reports and "rev" in reports:
	exe = reports["exe"]
	rev = reports["rev"]
	# remember only the first (latest) revision for the given executable
	if exe not in revisions:
	revisions[exe] = rev
	return revisions

	"""
	Returns a dict executable -> benchmark names
	"""
	def loadBenchmarkNames(codespeedUrl):
	execToBenchmarks = {}
	revisions = loadExecutableAndRevisions(codespeedUrl)
	# Per each revision/executable ask for benchmark names
	# http://codespeed.dak8s.net:8000/changes/table/?tre=10&rev=b9593715f35-ffcdbb4-1647399268&exe=1&env=2
	for exe, rev in revisions.items():
	url = codespeedUrl + 'changes/table/?' + urllib.urlencode({'tre': '10', 'rev': rev, 'exe': exe, 'env': ENVIRONMENT})
	f = urllib2.urlopen(url)
	response = f.read()
	f.close()
	benchmarks = []
	for line in response.split("\n"):
	# look for lines like and extract benchmark name between characters > and <
	# <td title="">remoteSortPartition</td>
	if '<td title="">' in line:
	benchmark = re.findall(r'\>([^<]+)\<', line)[0]
	benchmarks.append(benchmark)
	execToBenchmarks[exe] = benchmarks
	return execToBenchmarks

	"""
	Returns a list of benchmark results
	"""
	def loadData(codespeedUrl, exe, benchmark, downloadSamples):
	url = codespeedUrl + 'timeline/json/?' + urllib.urlencode({'exe': exe, 'ben': benchmark, 'env': ENVIRONMENT, 'revs': downloadSamples})
	f = urllib2.urlopen(url)
	response = f.read()
	f.close()
	timelines = json.loads(response)['timelines'][0]
	result = timelines['branches']['master'][exe]
	lessIsbBetter = (timelines['lessisbetter'] == " (less is better)")
	return [score for (date, score, deviation, commit, branch) in result], lessIsbBetter

	def getMedian(lst):
	lst = sorted(lst) # Sort the list first
	if len(lst) % 2 == 0: # Checking if the length is even
	# Applying formula which is sum of middle two divided by 2
	return (lst[len(lst) // 2] + lst[(len(lst) - 1) // 2]) / 2
	else:
	# If length is odd then get middle value
	return lst[len(lst) // 2]

	def isThresholdReached(threshold, baselineValue, comparedValue, lessIsbBetter):
	ratio = 0
	if baselineValue != 0:
	ratio = comparedValue * 100 / baselineValue - 100
	if lessIsbBetter:
	if ratio > (-1 * threshold):
	return True
	elif ratio < threshold:
	return True

	return False

	def checkBenchmark(args, exe, benchmark):
	results, lessIsbBetter = loadData(args.codespeed, exe, benchmark, args.downloadSamples)

	urlToBenchmark = args.codespeed + 'timeline/#/?' + urllib.urlencode({
	'ben': benchmark,
	'exe': exe,
	'env': ENVIRONMENT,
	'revs': args.downloadSamples,
	'equid': 'off',
	'quarts': 'on',
	'extr': 'on'})

	if len(results) < args.baseLine:
	return
	median = getMedian(results[args.recentTrend:args.baseLine])
	recent_median = getMedian(results[:args.recentTrend])
	if isThresholdReached(args.medianTrendThreshold, median, recent_median, lessIsbBetter):
	print "<%s\|%s> median=%s recent_median=%s" % (urlToBenchmark, benchmark, median, recent_median)

	if __name__ == "__main__":
	args = parser.parse_args()
	execToBenchmarks = loadBenchmarkNames(args.codespeed)
	for exe, benchmarks in execToBenchmarks.items():
	for benchmark in benchmarks:
	checkBenchmark(args, exe, benchmark)