| #!/usr/bin/python |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| from BeautifulSoup import BeautifulSoup |
| import urllib2 |
| import xmltodict |
| import json |
| import Queue |
| from threading import Thread |
| from collections import OrderedDict |
| import itertools |
| from ascii_graph import Pyasciigraph |
| import sys |
| import argparse |
| import os |
| |
| # default build that is used against apache hive precommit test report |
| REPORTS_DIR = "/tmp/slow-test-reports" |
| BUILD_NUMBER = 830 |
| TOP_K = 25 |
| json_dumps = [] |
| |
| # parallel xml report downloader |
| class ReportDownloader(Thread): |
| def __init__(self, q): |
| Thread.__init__(self) |
| self.q = q |
| |
| def run(self): |
| while True: |
| # Get the work from the queue and expand the tuple |
| link = self.q.get() |
| xmlFile = urllib2.urlopen(link) |
| xmlData = xmlFile.read() |
| xmlSoup = BeautifulSoup(xmlData) |
| d = xmltodict.parse(xmlData, xml_attribs=True) |
| d['testsuite'].pop('properties', None) |
| json_dumps.append(d) |
| self.q.task_done() |
| |
| def get_links(rootUrl): |
| html_page = urllib2.urlopen(rootUrl) |
| soup = BeautifulSoup(html_page) |
| result = [] |
| for link in soup.findAll('a'): |
| hrefs = link.get('href') |
| if hrefs.endswith('.xml'): |
| result.append(rootUrl + "/" + hrefs) |
| |
| return result |
| |
| def take(iterable, n=TOP_K): |
| return list(itertools.islice(iterable, 0, n)) |
| |
| def plot_testsuite_time(json_data, top_k=TOP_K, ascii_graph=False, report_file=None): |
| suite_time = {} |
| |
| overall_time = 0.0 |
| for suite in json_data: |
| name = suite['testsuite']['@name'].rsplit(".",1)[-1] |
| time = float(suite['testsuite']['@time'].replace(',','')) |
| overall_time += time |
| if name in suite_time: |
| total_time = suite_time[name] |
| suite_time[name] = total_time + time |
| else: |
| suite_time[name] = time |
| |
| d_descending = OrderedDict(sorted(suite_time.items(), |
| key=lambda kv: kv[1], reverse=True)) |
| |
| gdata = [] |
| for k,v in take(d_descending.iteritems(), top_k): |
| gdata.append((k, v)) |
| |
| print '\nTop ' + str(top_k) + ' testsuite in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]' |
| if ascii_graph: |
| graph = Pyasciigraph() |
| for line in graph.graph('', gdata): |
| print line |
| else: |
| for line in gdata: |
| print line[0] + "\t" + str(line[1]) |
| |
| if report_file != None: |
| with open(report_file, "w") as f: |
| f.write('Top ' + str(top_k) + ' testsuite in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]\n') |
| for line in gdata: |
| f.write(line[0] + "\t" + str(line[1]) + "\n") |
| |
| |
| def plot_testcase_time(json_data, top_k=TOP_K, ascii_graph=False, report_file=None): |
| testcase_time = {} |
| |
| overall_time = 0.0 |
| for suite in json_data: |
| if int(suite['testsuite']['@tests']) > 0: |
| for t in suite['testsuite']['testcase']: |
| if isinstance(t, dict): |
| name = t['@classname'].rsplit(".",1)[-1] + "_" + t['@name'] |
| time = float(t['@time'].replace(',','')) |
| overall_time += time |
| if name in testcase_time: |
| total_time = testcase_time[name] |
| testcase_time[name] = total_time + time |
| else: |
| testcase_time[name] = time |
| if int(suite['testsuite']['@tests']) == 0: |
| print "Empty batch detected for testsuite: " + suite['testsuite']['@name'] + " which took " + suite['testsuite']['@time'] + "s" |
| |
| d_descending = OrderedDict(sorted(testcase_time.items(), |
| key=lambda kv: kv[1], reverse=True)) |
| |
| gdata = [] |
| for k,v in take(d_descending.iteritems(), top_k): |
| gdata.append((k, v)) |
| |
| |
| print '\nTop ' + str(top_k) + ' testcases in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]' |
| if ascii_graph: |
| graph = Pyasciigraph() |
| for line in graph.graph('', gdata): |
| print line |
| else: |
| for line in gdata: |
| print line[0] + "\t" + str(line[1]) |
| |
| if report_file != None: |
| with open(report_file, "a") as f: |
| f.write('\nTop ' + str(top_k) + ' testcases in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]\n') |
| for line in gdata: |
| f.write(line[0] + "\t" + str(line[1]) + "\n") |
| |
| def get_latest_build_with_report(build_number): |
| latest_report = BUILD_NUMBER |
| if not os.path.exists(REPORTS_DIR): |
| os.makedirs(REPORTS_DIR) |
| for i in os.listdir(REPORTS_DIR): |
| if i.endswith(".txt"): |
| current_report = int(i.split(".txt")[0]) |
| if current_report > latest_report: |
| latest_report = current_report |
| |
| return latest_report |
| |
| def get_pending_report_list(last_report, precommit_url): |
| next_report = last_report |
| pending_reports = [] |
| done = False |
| while done == False: |
| try: |
| urllib2.urlopen(precommit_url % next_report) |
| pending_reports.append(next_report) |
| next_report += 1 |
| except urllib2.HTTPError, e: |
| done = True |
| |
| return pending_reports |
| |
| def print_report(reportUrl, json_dump, top_k, ascii_graph, report_file=None): |
| get_links(reportUrl) |
| links = get_links(reportUrl) |
| # Create a queue to communicate with the worker threads |
| q = Queue.Queue() |
| print "\nProcessing " + str(len(links)) + " test xml reports from " + reportUrl + ".." |
| # Create 8 worker threads |
| for x in range(8): |
| worker = ReportDownloader(q) |
| # Setting daemon to True will let the main thread exit even though the workers are blocking |
| worker.daemon = True |
| worker.start() |
| |
| # Put the tasks into the queue as a tuple |
| for link in links: |
| q.put(link) |
| |
| # Causes the main thread to wait for the queue to finish processing all the tasks |
| q.join() |
| |
| # dump test reports in json format |
| if json_dump: |
| with open('data.json', 'w') as outfile: |
| json.dump(json_dumps, outfile, indent = 2) |
| |
| # print or plot top-k tests on console |
| plot_testsuite_time(json_dumps, top_k, ascii_graph, report_file) |
| plot_testcase_time(json_dumps, top_k, ascii_graph, report_file) |
| del json_dumps[:] |
| |
| def main(): |
| parser = argparse.ArgumentParser(description='Program to print top-k test report for Apache Hive precommit tests') |
| parser.add_argument('-b', action='store', dest='build_number', help='build number of the test run. default uses test reports from apache hive precommit test run.') |
| parser.add_argument('-u', action='store', dest='report_url', help='url for the test report') |
| parser.add_argument('-j', action='store_true', default=False, dest='json_dump', help='json dump of test reports') |
| parser.add_argument('-k', action='store', dest='top_k', type=int, help='print top k testsuite and testcases to console') |
| parser.add_argument('-a', action='store_true', default=False, dest='ascii_graph', help='ascii output of the report') |
| parser.add_argument('-l', action='store_true', default=False, dest='latest_report', help='will generate all missing reports up until latest build number') |
| args = parser.parse_args() |
| |
| precommit_url = "http://104.198.109.242/logs/PreCommit-HIVE-Build-%s/test-results/" |
| last_report = get_latest_build_with_report(BUILD_NUMBER) |
| pending_reports = get_pending_report_list(last_report, precommit_url) |
| |
| build = last_report |
| if args.build_number != None: |
| build = args.build_number |
| |
| reportUrl = precommit_url % build |
| if args.report_url != None: |
| reportUrl = args.report_url |
| |
| json_dump = args.json_dump |
| |
| top_k = TOP_K |
| if args.top_k != None: |
| top_k = args.top_k |
| |
| ascii_graph = args.ascii_graph |
| |
| print_report(reportUrl, json_dump, top_k, ascii_graph, REPORTS_DIR + str(build) + ".txt") |
| |
| if args.latest_report: |
| for l in pending_reports: |
| reportUrl = precommit_url % l |
| print_report(reportUrl, json_dump, top_k, ascii_graph, REPORTS_DIR + str(l) + ".txt") |
| |
| main() |