[FLINK-33482] Flink benchmark regression check in the machines hosted on Aliyun (#81)
diff --git a/jenkinsfiles/regression-check.jenkinsfile b/jenkinsfiles/regression-check.jenkinsfile
index b769a1f..1cbde9f 100644
--- a/jenkinsfiles/regression-check.jenkinsfile
+++ b/jenkinsfiles/regression-check.jenkinsfile
@@ -18,10 +18,10 @@
timestamps {
try {
timeout(time: 3, unit: 'HOURS') { // includes waiting for a machine
- node('Hetzner') {
+ node('Aliyun') {
dir('flink-benchmarks') {
git url: 'https://github.com/apache/flink-benchmarks.git', branch: 'master'
- sh './regression_report_v2.py > regression-report'
+ sh 'python2 ./regression_report_v2.py > regression-report'
def alerts = readFile "regression-report"
if (alerts) {
def attachments = [
diff --git a/regression_report.py b/regression_report.py
index fdbdcd7..1ddcedd 100755
--- a/regression_report.py
+++ b/regression_report.py
@@ -25,8 +25,9 @@
import json
import re
-DEFAULT_CODESPEED_URL = 'http://codespeed.dak8s.net:8000/'
-ENVIRONMENT = 2
+DEFAULT_CODESPEED_URL = 'http://flink-speed.xyz/'
+ENVIRONMENT = 3
+ENVNAME='Aliyun'
current_date = datetime.datetime.today()
@@ -70,6 +71,37 @@
return revisions
"""
+Returns a dict executable id -> executable name
+"""
+def loadExecutableNames(codespeedUrl):
+ names = {}
+ url = codespeedUrl + 'reports'
+ f = urllib2.urlopen(url)
+ response = f.read()
+ f.close()
+ for line in response.split('\n'):
+ # Find urls like: /changes/?rev=b8e7fc387dd-ffcdbb4-1647231150&exe=1&env=Hetzner
+ # and extract rev and exe params out of it
+ reports = dict(re.findall(r'([a-z]+)=([a-z0-9\-]+)', line))
+ if "exe" in reports and "rev" in reports:
+ exe = reports["exe"]
+ name = re.findall('([A-Za-z0-9\-\ \(\)]+)\@' + ENVNAME + '\<\/td\>', line)
+ # remember only the first (latest) revision for the given executable
+ if exe not in names and len(name) > 0:
+ names[exe] = name[0]
+ return names
+
+"""
+Returns the Java version from the executable name
+"""
+def extractJavaVersion(name):
+ result = re.findall('(\Java[0-9]+)', name)
+ if len(result) > 0:
+ return result[0]
+ else:
+ return "Java8"
+
+"""
Returns a dict executable -> benchmark names
"""
def loadBenchmarkNames(codespeedUrl):
diff --git a/regression_report_v2.py b/regression_report_v2.py
index f0389f4..9ce50f5 100755
--- a/regression_report_v2.py
+++ b/regression_report_v2.py
@@ -22,7 +22,11 @@
import urllib
import urllib2
+from regression_report import DEFAULT_CODESPEED_URL
+from regression_report import ENVIRONMENT
from regression_report import loadBenchmarkNames
+from regression_report import loadExecutableNames
+from regression_report import extractJavaVersion
"""
The regression detection algorithm calculates the regression ratio as the ratio of change between the current
@@ -32,7 +36,6 @@
Please refer to https://docs.google.com/document/d/1Bvzvq79Ll5yxd1UtC0YzczgFbZPAgPcN3cI0MjVkIag for more detail.
"""
-ENVIRONMENT = 2
MIN_SAMPLE_SIZE_LIMIT = 5
"""
@@ -50,16 +53,16 @@
return result, lessIsbBetter
def detectRegression(urlToBenchmark, stds, scores, baselineSize, minRegressionRatio, minInstabilityMultiplier,
- direction):
+ direction, execName):
sustainable_x = [min(scores[i - 3: i]) for i in range(3, min(len(scores), baselineSize))]
baseline_throughput = max(sustainable_x)
current_throughput = max(scores[-3:])
current_instability = stds[-1] / current_throughput
if direction * (1 - current_throughput / baseline_throughput) > max(minRegressionRatio, direction * minInstabilityMultiplier * current_instability):
- print "<%s|%s> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, direction * baseline_throughput, direction * current_throughput)
+ print "<%s|%s(%s)> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, extractJavaVersion(execName), direction * baseline_throughput, direction * current_throughput)
-def checkBenchmark(args, exe, benchmark):
+def checkBenchmark(args, exe, benchmark, execNames):
results, lessIsbBetter = loadHistoryData(args.codespeedUrl, exe, benchmark, args.numBaselineSamples + 3)
results = list(reversed(results))
scores = [score for (date, score, deviation, commit, branch) in results]
@@ -82,7 +85,7 @@
scores = [-1 * score for score in scores]
direction = -1
detectRegression(urlToBenchmark, stds, scores, args.numBaselineSamples, args.minRegressionRatio,
- args.minInstabilityMultiplier, direction)
+ args.minInstabilityMultiplier, direction, execNames[exe])
if __name__ == "__main__":
@@ -101,11 +104,12 @@
parser.add_argument('--min-instability-multiplier', dest='minInstabilityMultiplier', required=False,
default=2, type=float,
help="Min instability multiplier to measure deviation.")
- parser.add_argument('--codespeed-url', dest='codespeedUrl', default="http://codespeed.dak8s.net:8000/",
+ parser.add_argument('--codespeed-url', dest='codespeedUrl', default=DEFAULT_CODESPEED_URL,
help='The codespeed url.')
args = parser.parse_args()
execToBenchmarks = loadBenchmarkNames(args.codespeedUrl)
+ execNames = loadExecutableNames(args.codespeedUrl)
for exe, benchmarks in execToBenchmarks.items():
for benchmark in benchmarks:
- checkBenchmark(args, exe, benchmark)
+ checkBenchmark(args, exe, benchmark, execNames)