| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "<!--\n", |
| "#\n", |
| "# Licensed to the Apache Software Foundation (ASF) under one or more\n", |
| "# contributor license agreements. See the NOTICE file distributed with\n", |
| "# this work for additional information regarding copyright ownership.\n", |
| "# The ASF licenses this file to You under the Apache License, Version 2.0\n", |
| "# (the \"License\"); you may not use this file except in compliance with\n", |
| "# the License. You may obtain a copy of the License at\n", |
| "#\n", |
| "# http://www.apache.org/licenses/LICENSE-2.0\n", |
| "#\n", |
| "# Unless required by applicable law or agreed to in writing, software\n", |
| "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", |
| "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", |
| "# See the License for the specific language governing permissions and\n", |
| "# limitations under the License.\n", |
| "#\n", |
| "-->\n", |
| "\n", |
| "# Precommit Job Times\n", |
| "This notebook fetches test statistics from Jenkins.\n", |
| "\n", |
| "## Requirements\n", |
| "\n", |
| "```shell\n", |
| "pip install pandas matplotlib requests\n", |
| "# You may need to restart Jupyter for matplotlib to work.\n", |
| "```\n", |
| "\n", |
| "**Note:** Requests to `builds.apache.org` must contain a ?depth= or ?tree= argument, otherwise your IP will get banned. [Policy](https://cwiki.apache.org/confluence/display/INFRA/Using+the+ASF+Jenkins+API)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "slideshow": { |
| "slide_type": "-" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "import pandas as pd\n", |
| "import numpy as np\n", |
| "import matplotlib.pyplot as plt\n", |
| "import matplotlib.dates as md\n", |
| "import requests" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Fetch precommit job data from Jenkins.\n", |
| "\n", |
| "class Build(dict):\n", |
| " def __init__(self, job_name, json):\n", |
| " self['job_name'] = job_name\n", |
| " self['result'] = json['result']\n", |
| " self['number'] = json['number']\n", |
| " self['timestamp'] = pd.Timestamp.utcfromtimestamp(json['timestamp'] / 1000)\n", |
| " self['queuingDurationMillis'] = -1\n", |
| " self['totalDurationMillis'] = -1\n", |
| " for action in json['actions']:\n", |
| " if action.get('_class', None) == 'jenkins.metrics.impl.TimeInQueueAction':\n", |
| " self['queuingDurationMinutes'] = action['queuingDurationMillis'] / 60000.\n", |
| " self['totalDurationMinutes'] = action['totalDurationMillis'] / 60000.\n", |
| " if self['queuingDurationMinutes'] == -1:\n", |
| " raise ValueError('could not find queuingDurationMillis in: %s', json)\n", |
| " if self['totalDurationMinutes'] == -1:\n", |
| " raise ValueError('could not find totalDurationMillis in: %s', json)\n", |
| " \n", |
| "# Can be 'builds' (last 50) or 'allBuilds'.\n", |
| "builds_key = 'allBuilds' \n", |
| "\n", |
| "builds = []\n", |
| "job_names = ['beam_PreCommit_Java_Cron', 'beam_PreCommit_Python_Cron', 'beam_PreCommit_Go_Cron']\n", |
| "for job_name in job_names:\n", |
| " url = 'https://builds.apache.org/job/%s/api/json' % job_name\n", |
| " params = {\n", |
| " 'tree': '%s[result,number,timestamp,actions[queuingDurationMillis,totalDurationMillis]]' % builds_key}\n", |
| " r = requests.get(url, params=params)\n", |
| " data = r.json()\n", |
| " builds.extend([Build(job_name, build_json)\n", |
| " for build_json in data[builds_key]])\n", |
| "\n", |
| "df = pd.DataFrame(builds)\n", |
| "\n", |
| "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=4)\n", |
| "df_4weeks = df[df.timestamp >= timestamp_cutoff]\n", |
| "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=1)\n", |
| "df_1week = df[df.timestamp >= timestamp_cutoff]\n", |
| "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(days=1)\n", |
| "df_1day = df[df.timestamp >= timestamp_cutoff]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Graphs of precommit job times.\n", |
| "\n", |
| "for job_name in job_names:\n", |
| " duration_df = df_4weeks[df_4weeks.job_name == job_name]\n", |
| " duration_df = duration_df[['timestamp', 'queuingDurationMinutes', 'totalDurationMinutes']]\n", |
| " ax = duration_df.plot(x='timestamp')\n", |
| " ax.set_title(job_name)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Get 95th percentile of precommit run times.\n", |
| "test_dfs = {'4 weeks': df_4weeks, '1 week': df_1week, '1 day': df_1day}\n", |
| "metrics = []\n", |
| "\n", |
| "for sample_time, test_df in test_dfs.items():\n", |
| " for job_name in job_names:\n", |
| " df_times = test_df[test_df.job_name == job_name]\n", |
| " for percentile in [95]:\n", |
| " total_all = np.percentile(df_times.totalDurationMinutes, q=percentile)\n", |
| " total_success = np.percentile(df_times[df_times.result == 'SUCCESS'].totalDurationMinutes,\n", |
| " q=percentile)\n", |
| " queue = np.percentile(df_times.queuingDurationMinutes, q=percentile)\n", |
| " metrics.append({'job_name': '%s %s %dth' % (\n", |
| " job_name.replace('beam_PreCommit_','').replace('_GradleBuild',''),\n", |
| " sample_time, percentile),\n", |
| " 'totalDurationMinutes_all': total_all,\n", |
| " 'totalDurationMinutes_success_only': total_success,\n", |
| " 'queuingDurationMinutes': queue,\n", |
| " })\n", |
| "\n", |
| "pd.DataFrame(metrics).sort_values('job_name')" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Fetch individual test data (precommit) from Jenkins.\n", |
| "MAX_FETCH_PER_JOB_TYPE = 5\n", |
| "\n", |
| "test_results_raw = []\n", |
| "for job_name in list(df.job_name.unique()):\n", |
| " if job_name == 'beam_PreCommit_Go_Cron':\n", |
| " # TODO: Go builds are missing testReport data on Jenkins.\n", |
| " continue\n", |
| " build_nums = list(df.number[df.job_name == job_name].unique())\n", |
| " num_fetched = 0\n", |
| " for build_num in build_nums:\n", |
| " url = 'https://builds.apache.org/job/%s/%s/testReport/api/json?depth=1' % (job_name, build_num)\n", |
| " print('.', end='')\n", |
| " r = requests.get(url)\n", |
| " if not r.ok:\n", |
| " # Typically a 404 means that the job is still running.\n", |
| " print('skipping (%s): %s' % (r.status_code, url))\n", |
| " continue\n", |
| " raw_result = r.json()\n", |
| " raw_result['job_name'] = job_name\n", |
| " raw_result['build_num'] = build_num\n", |
| " test_results_raw.append(raw_result)\n", |
| " \n", |
| " num_fetched += 1\n", |
| " if num_fetched >= MAX_FETCH_PER_JOB_TYPE:\n", |
| " break\n", |
| "\n", |
| "print(' done')" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Analyze individual test results.\n", |
| "\n", |
| "class TestResult(dict):\n", |
| " def __init__(self, job_name, build_num, json):\n", |
| " self['job_name'] = job_name\n", |
| " self['build_num'] = build_num\n", |
| " self['name'] = json['name']\n", |
| " self['duration'] = json['duration']\n", |
| " self['className'] = json['className']\n", |
| " self['status'] = json['status']\n", |
| "\n", |
| "test_results = []\n", |
| "for test_result_raw in test_results_raw:\n", |
| " job_name = test_result_raw['job_name']\n", |
| " build_num = test_result_raw['build_num']\n", |
| " for suite in test_result_raw['suites']:\n", |
| " for case in suite['cases']:\n", |
| " test_results.append(TestResult(job_name, build_num, case))\n", |
| "\n", |
| "df_tests = pd.DataFrame(test_results)\n", |
| "df_tests = df_tests.drop(columns=['build_num'])\n", |
| "df_tests = df_tests.groupby(['className', 'job_name', 'name', 'status'], as_index=False).max()\n", |
| "df_tests = df_tests.sort_values('duration', ascending=False)\n", |
| "\n", |
| "def filter_test_results(job_name, status):\n", |
| " res = df_tests\n", |
| " if job_name != 'all':\n", |
| " res = res[res.job_name == job_name]\n", |
| " if status != 'all':\n", |
| " res = res[res.status == status]\n", |
| " return res.head(n=20)\n", |
| "\n", |
| "from ipywidgets import interact\n", |
| "interact(filter_test_results,\n", |
| " job_name=['all'] + list(df_tests.job_name.unique()),\n", |
| " status=['all'] + list(df_tests.status.unique()))" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.5.3" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 2 |
| } |