[FLINK-33482] Flink benchmark regression check in the machines hosted on Aliyun (#81)

diff --git a/jenkinsfiles/regression-check.jenkinsfile b/jenkinsfiles/regression-check.jenkinsfile
index b769a1f..1cbde9f 100644
--- a/jenkinsfiles/regression-check.jenkinsfile
+++ b/jenkinsfiles/regression-check.jenkinsfile
@@ -18,10 +18,10 @@
 timestamps {
     try {
         timeout(time: 3, unit: 'HOURS') { // includes waiting for a machine
-            node('Hetzner') {
+            node('Aliyun') {
                 dir('flink-benchmarks') {
                     git url: 'https://github.com/apache/flink-benchmarks.git', branch: 'master'
-                    sh './regression_report_v2.py > regression-report'
+                    sh 'python2 ./regression_report_v2.py > regression-report'
                     def alerts = readFile "regression-report"
                     if (alerts) {
                          def attachments = [
diff --git a/regression_report.py b/regression_report.py
index fdbdcd7..1ddcedd 100755
--- a/regression_report.py
+++ b/regression_report.py
@@ -25,8 +25,9 @@
 import json
 import re
 
-DEFAULT_CODESPEED_URL = 'http://codespeed.dak8s.net:8000/'
-ENVIRONMENT = 2
+DEFAULT_CODESPEED_URL = 'http://flink-speed.xyz/'
+ENVIRONMENT = 3
+ENVNAME='Aliyun'
 
 current_date = datetime.datetime.today()
 
@@ -70,6 +71,37 @@
     return revisions
 
 """
+Returns a dict executable id -> executable name
+"""
+def loadExecutableNames(codespeedUrl):
+    names = {}
+    url = codespeedUrl + 'reports'
+    f = urllib2.urlopen(url)
+    response = f.read()
+    f.close()
+    for line in response.split('\n'):
+        # Find urls like: /changes/?rev=b8e7fc387dd-ffcdbb4-1647231150&exe=1&env=Hetzner
+        # and extract rev and exe params out of it
+        reports = dict(re.findall(r'([a-z]+)=([a-z0-9\-]+)', line))
+        if "exe" in reports and "rev" in reports:
+            exe = reports["exe"]
+            name = re.findall('([A-Za-z0-9\-\ \(\)]+)\@' + ENVNAME + '\<\/td\>', line)
+            # remember only the first (latest) revision for the given executable
+            if exe not in names and len(name) > 0:
+                names[exe] = name[0]
+    return names
+
+"""
+Returns the Java version from the executable name
+"""
+def extractJavaVersion(name):
+    result = re.findall('(\Java[0-9]+)', name)
+    if len(result) > 0:
+        return result[0]
+    else:
+        return "Java8"
+
+"""
 Returns a dict executable -> benchmark names 
 """
 def loadBenchmarkNames(codespeedUrl):
diff --git a/regression_report_v2.py b/regression_report_v2.py
index f0389f4..9ce50f5 100755
--- a/regression_report_v2.py
+++ b/regression_report_v2.py
@@ -22,7 +22,11 @@
 import urllib
 import urllib2
 
+from regression_report import DEFAULT_CODESPEED_URL
+from regression_report import ENVIRONMENT
 from regression_report import loadBenchmarkNames
+from regression_report import loadExecutableNames
+from regression_report import extractJavaVersion
 
 """
 The regression detection algorithm calculates the regression ratio as the ratio of change between the current
@@ -32,7 +36,6 @@
 Please refer to https://docs.google.com/document/d/1Bvzvq79Ll5yxd1UtC0YzczgFbZPAgPcN3cI0MjVkIag for more detail.
 """
 
-ENVIRONMENT = 2
 MIN_SAMPLE_SIZE_LIMIT = 5
 
 """
@@ -50,16 +53,16 @@
     return result, lessIsbBetter
 
 def detectRegression(urlToBenchmark, stds, scores, baselineSize, minRegressionRatio, minInstabilityMultiplier,
-                     direction):
+                     direction, execName):
 
     sustainable_x = [min(scores[i - 3: i]) for i in range(3, min(len(scores), baselineSize))]
     baseline_throughput = max(sustainable_x)
     current_throughput = max(scores[-3:])
     current_instability = stds[-1] / current_throughput
     if direction * (1 - current_throughput / baseline_throughput) > max(minRegressionRatio,  direction * minInstabilityMultiplier * current_instability):
-        print "<%s|%s> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, direction * baseline_throughput, direction * current_throughput)
+        print "<%s|%s(%s)> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, extractJavaVersion(execName), direction * baseline_throughput, direction * current_throughput)
 
-def checkBenchmark(args, exe, benchmark):
+def checkBenchmark(args, exe, benchmark, execNames):
     results, lessIsbBetter = loadHistoryData(args.codespeedUrl, exe, benchmark, args.numBaselineSamples + 3)
     results = list(reversed(results))
     scores = [score for (date, score, deviation, commit, branch) in results]
@@ -82,7 +85,7 @@
         scores = [-1 * score for score in scores]
         direction = -1
     detectRegression(urlToBenchmark, stds, scores, args.numBaselineSamples, args.minRegressionRatio,
-                     args.minInstabilityMultiplier, direction)
+                     args.minInstabilityMultiplier, direction, execNames[exe])
 
 
 if __name__ == "__main__":
@@ -101,11 +104,12 @@
     parser.add_argument('--min-instability-multiplier', dest='minInstabilityMultiplier', required=False,
                         default=2, type=float,
                         help="Min instability multiplier to measure deviation.")
-    parser.add_argument('--codespeed-url', dest='codespeedUrl', default="http://codespeed.dak8s.net:8000/",
+    parser.add_argument('--codespeed-url', dest='codespeedUrl', default=DEFAULT_CODESPEED_URL,
                         help='The codespeed url.')
 
     args = parser.parse_args()
     execToBenchmarks = loadBenchmarkNames(args.codespeedUrl)
+    execNames = loadExecutableNames(args.codespeedUrl)
     for exe, benchmarks in execToBenchmarks.items():
         for benchmark in benchmarks:
-            checkBenchmark(args, exe, benchmark)
+            checkBenchmark(args, exe, benchmark, execNames)