| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| ################################################################################ |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| ################################################################################ |
| import argparse |
| import json |
| import urllib |
| import urllib2 |
| |
| from regression_report import loadBenchmarkNames |
| |
| """ |
| The regression detection algorithm calculates the regression ratio as the ratio of change between the current |
| throughput and the maximum throughput observed in the most recent numBaselineSamples samples. A regression alert is |
| triggered if the regression ratio exceeds max(minRegressionRatio, minInstabilityMultiplier * lastStandardDeviation) |
| |
| Please refer to https://docs.google.com/document/d/1Bvzvq79Ll5yxd1UtC0YzczgFbZPAgPcN3cI0MjVkIag for more detail. |
| """ |
| |
| ENVIRONMENT = 2 |
| MIN_SAMPLE_SIZE_LIMIT = 5 |
| |
| """ |
| Returns a list of benchmark results |
| """ |
| def loadHistoryData(codespeedUrl, exe, benchmark, baselineSize): |
| url = codespeedUrl + 'timeline/json/?' + urllib.urlencode( |
| {'exe': exe, 'ben': benchmark, 'env': ENVIRONMENT, 'revs': baselineSize}) |
| f = urllib2.urlopen(url) |
| response = f.read() |
| f.close() |
| timelines = json.loads(response)['timelines'][0] |
| result = timelines['branches']['master'][exe] |
| lessIsbBetter = (timelines['lessisbetter'] == " (less is better)") |
| return result, lessIsbBetter |
| |
| def detectRegression(urlToBenchmark, stds, scores, baselineSize, minRegressionRatio, minInstabilityMultiplier, |
| direction): |
| sustainable_x = [min(scores[i - 3: i]) for i in range(3, baselineSize)] |
| baseline_throughput = max(sustainable_x) |
| current_throughput = max(scores[-3:]) |
| current_instability = stds[-1] / current_throughput |
| if direction * (1 - current_throughput / baseline_throughput) > max(minRegressionRatio, direction * minInstabilityMultiplier * current_instability): |
| print "<%s|%s> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, direction * baseline_throughput, direction * current_throughput) |
| |
| def checkBenchmark(args, exe, benchmark): |
| results, lessIsbBetter = loadHistoryData(args.codespeedUrl, exe, benchmark, args.numBaselineSamples + 3) |
| results = list(reversed(results)) |
| scores = [score for (date, score, deviation, commit, branch) in results] |
| stds = [deviation for (date, score, deviation, commit, branch) in results] |
| |
| urlToBenchmark = args.codespeedUrl + 'timeline/#/?' + urllib.urlencode({ |
| 'ben': benchmark, |
| 'exe': exe, |
| 'env': ENVIRONMENT, |
| 'revs': args.numDisplaySamples, |
| 'equid': 'off', |
| 'quarts': 'on', |
| 'extr': 'on'}) |
| |
| if len(results) < MIN_SAMPLE_SIZE_LIMIT: |
| return |
| |
| direction = 1 |
| if lessIsbBetter: |
| scores = [-1 * score for score in scores] |
| direction = -1 |
| detectRegression(urlToBenchmark, stds, scores, args.numBaselineSamples, args.minRegressionRatio, |
| args.minInstabilityMultiplier, direction) |
| |
| |
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser(description='Regression report based on Max/Min value') |
| parser.add_argument('--num-baseline-samples', dest='numBaselineSamples', required=False, default=30, type=int, |
| help='The maximum number of recent samples across which the maximum achieved throughput would be ' |
| 'used as the baseline for regression detection.') |
| parser.add_argument('--num-display-samples', dest='numDisplaySamples', required=False, default=200, |
| type=int, |
| help='Number of samples to display in regression report for human inspection. Not all values ' |
| 'are working.') |
| parser.add_argument('--min-regression-ratio', dest='minRegressionRatio', required=False, |
| default=0.04, type=float, |
| help='A regression should be alerted only if the ratio of change between the baseline ' |
| 'throughput and the current throughput exceeds the configured value.') |
| parser.add_argument('--min-instability-multiplier', dest='minInstabilityMultiplier', required=False, |
| default=2, type=float, |
| help="Min instability multiplier to measure deviation.") |
| parser.add_argument('--codespeed-url', dest='codespeedUrl', default="http://codespeed.dak8s.net:8000/", |
| help='The codespeed url.') |
| |
| args = parser.parse_args() |
| execToBenchmarks = loadBenchmarkNames(args.codespeedUrl) |
| for exe, benchmarks in execToBenchmarks.items(): |
| for benchmark in benchmarks: |
| checkBenchmark(args, exe, benchmark) |