blob: 1bfbeda0f70614849a0afb1c10d84232f6dae277 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<!--\n",
"#\n",
"# Licensed to the Apache Software Foundation (ASF) under one or more\n",
"# contributor license agreements. See the NOTICE file distributed with\n",
"# this work for additional information regarding copyright ownership.\n",
"# The ASF licenses this file to You under the Apache License, Version 2.0\n",
"# (the \"License\"); you may not use this file except in compliance with\n",
"# the License. You may obtain a copy of the License at\n",
"#\n",
"# http://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License.\n",
"#\n",
"-->\n",
"\n",
"# Precommit Job Times\n",
"This notebook fetches test statistics from Jenkins.\n",
"\n",
"## Requirements\n",
"\n",
"```shell\n",
"pip install pandas matplotlib requests\n",
"# You may need to restart Jupyter for matplotlib to work.\n",
"```\n",
"\n",
"**Note:** Requests to `builds.apache.org` must contain a ?depth= or ?tree= argument, otherwise your IP will get banned. [Policy](https://cwiki.apache.org/confluence/display/INFRA/Using+the+ASF+Jenkins+API)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as md\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fetch precommit job data from Jenkins.\n",
"\n",
"class Build(dict):\n",
" def __init__(self, job_name, json):\n",
" self['job_name'] = job_name\n",
" self['result'] = json['result']\n",
" self['number'] = json['number']\n",
" self['timestamp'] = pd.Timestamp.utcfromtimestamp(json['timestamp'] / 1000)\n",
" self['queuingDurationMillis'] = -1\n",
" self['totalDurationMillis'] = -1\n",
" for action in json['actions']:\n",
" if action.get('_class', None) == 'jenkins.metrics.impl.TimeInQueueAction':\n",
" self['queuingDurationMinutes'] = action['queuingDurationMillis'] / 60000.\n",
" self['totalDurationMinutes'] = action['totalDurationMillis'] / 60000.\n",
" if self['queuingDurationMinutes'] == -1:\n",
" raise ValueError('could not find queuingDurationMillis in: %s', json)\n",
" if self['totalDurationMinutes'] == -1:\n",
" raise ValueError('could not find totalDurationMillis in: %s', json)\n",
" \n",
"# Can be 'builds' (last 50) or 'allBuilds'.\n",
"builds_key = 'allBuilds' \n",
"\n",
"builds = []\n",
"job_names = ['beam_PreCommit_Java_Cron', 'beam_PreCommit_Python_Cron', 'beam_PreCommit_Go_Cron']\n",
"for job_name in job_names:\n",
" url = 'https://builds.apache.org/job/%s/api/json' % job_name\n",
" params = {\n",
" 'tree': '%s[result,number,timestamp,actions[queuingDurationMillis,totalDurationMillis]]' % builds_key}\n",
" r = requests.get(url, params=params)\n",
" data = r.json()\n",
" builds.extend([Build(job_name, build_json)\n",
" for build_json in data[builds_key]])\n",
"\n",
"df = pd.DataFrame(builds)\n",
"\n",
"timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=4)\n",
"df_4weeks = df[df.timestamp >= timestamp_cutoff]\n",
"timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=1)\n",
"df_1week = df[df.timestamp >= timestamp_cutoff]\n",
"timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(days=1)\n",
"df_1day = df[df.timestamp >= timestamp_cutoff]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Graphs of precommit job times.\n",
"\n",
"for job_name in job_names:\n",
" duration_df = df_4weeks[df_4weeks.job_name == job_name]\n",
" duration_df = duration_df[['timestamp', 'queuingDurationMinutes', 'totalDurationMinutes']]\n",
" ax = duration_df.plot(x='timestamp')\n",
" ax.set_title(job_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get 95th percentile of precommit run times.\n",
"test_dfs = {'4 weeks': df_4weeks, '1 week': df_1week, '1 day': df_1day}\n",
"metrics = []\n",
"\n",
"for sample_time, test_df in test_dfs.items():\n",
" for job_name in job_names:\n",
" df_times = test_df[test_df.job_name == job_name]\n",
" for percentile in [95]:\n",
" total_all = np.percentile(df_times.totalDurationMinutes, q=percentile)\n",
" total_success = np.percentile(df_times[df_times.result == 'SUCCESS'].totalDurationMinutes,\n",
" q=percentile)\n",
" queue = np.percentile(df_times.queuingDurationMinutes, q=percentile)\n",
" metrics.append({'job_name': '%s %s %dth' % (\n",
" job_name.replace('beam_PreCommit_','').replace('_GradleBuild',''),\n",
" sample_time, percentile),\n",
" 'totalDurationMinutes_all': total_all,\n",
" 'totalDurationMinutes_success_only': total_success,\n",
" 'queuingDurationMinutes': queue,\n",
" })\n",
"\n",
"pd.DataFrame(metrics).sort_values('job_name')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fetch individual test data (precommit) from Jenkins.\n",
"MAX_FETCH_PER_JOB_TYPE = 5\n",
"\n",
"test_results_raw = []\n",
"for job_name in list(df.job_name.unique()):\n",
" if job_name == 'beam_PreCommit_Go_Cron':\n",
" # TODO: Go builds are missing testReport data on Jenkins.\n",
" continue\n",
" build_nums = list(df.number[df.job_name == job_name].unique())\n",
" num_fetched = 0\n",
" for build_num in build_nums:\n",
" url = 'https://builds.apache.org/job/%s/%s/testReport/api/json?depth=1' % (job_name, build_num)\n",
" print('.', end='')\n",
" r = requests.get(url)\n",
" if not r.ok:\n",
" # Typically a 404 means that the job is still running.\n",
" print('skipping (%s): %s' % (r.status_code, url))\n",
" continue\n",
" raw_result = r.json()\n",
" raw_result['job_name'] = job_name\n",
" raw_result['build_num'] = build_num\n",
" test_results_raw.append(raw_result)\n",
" \n",
" num_fetched += 1\n",
" if num_fetched >= MAX_FETCH_PER_JOB_TYPE:\n",
" break\n",
"\n",
"print(' done')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Analyze individual test results.\n",
"\n",
"class TestResult(dict):\n",
" def __init__(self, job_name, build_num, json):\n",
" self['job_name'] = job_name\n",
" self['build_num'] = build_num\n",
" self['name'] = json['name']\n",
" self['duration'] = json['duration']\n",
" self['className'] = json['className']\n",
" self['status'] = json['status']\n",
"\n",
"test_results = []\n",
"for test_result_raw in test_results_raw:\n",
" job_name = test_result_raw['job_name']\n",
" build_num = test_result_raw['build_num']\n",
" for suite in test_result_raw['suites']:\n",
" for case in suite['cases']:\n",
" test_results.append(TestResult(job_name, build_num, case))\n",
"\n",
"df_tests = pd.DataFrame(test_results)\n",
"df_tests = df_tests.drop(columns=['build_num'])\n",
"df_tests = df_tests.groupby(['className', 'job_name', 'name', 'status'], as_index=False).max()\n",
"df_tests = df_tests.sort_values('duration', ascending=False)\n",
"\n",
"def filter_test_results(job_name, status):\n",
" res = df_tests\n",
" if job_name != 'all':\n",
" res = res[res.job_name == job_name]\n",
" if status != 'all':\n",
" res = res[res.status == status]\n",
" return res.head(n=20)\n",
"\n",
"from ipywidgets import interact\n",
"interact(filter_test_results,\n",
" job_name=['all'] + list(df_tests.job_name.unique()),\n",
" status=['all'] + list(df_tests.status.unique()))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}