| #!/usr/bin/env python |
| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # |
| # -*- Python -*- |
| """find-fix.py: produce a find/fix report for Subversion's IZ database |
| |
| For simple text summary: |
| find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD |
| Statistics will be printed for bugs found or fixed within the |
| time frame. |
| |
| For gnuplot presentation: |
| find-fix.py query-set-1.tsv outfile |
| Gnuplot provides its own way to select date ranges. |
| |
| Either way, get a query-set-1.tsv from: |
| http://subversion.tigris.org/iz-data/query-set-1.tsv (updated nightly) |
| See http://subversion.tigris.org/iz-data/README for more info on that file. |
| |
| For more usage info on this script: |
| find-fix.py --help |
| """ |
| |
| _version = "$Revision:" |
| |
| # |
| # This can be run over the data file found at: |
| # http://subversion.tigris.org/iz-data/query-set-1.tsv |
| # |
| |
| import getopt |
| try: |
| my_getopt = getopt.gnu_getopt |
| except AttributeError: |
| my_getopt = getopt.getopt |
| import operator |
| import os |
| import os.path |
| import pydoc |
| import re |
| try: |
| # Python >=2.6 |
| from functools import reduce |
| except ImportError: |
| # Python <2.6 |
| pass |
| import sys |
| import time |
| |
| me = os.path.basename(sys.argv[0]) |
| |
| # Long options and their usage strings; "=" means it takes an argument. |
| # To get a list suitable for getopt, just do |
| # |
| # [x[0] for x in long_opts] |
| # |
| # Make sure to sacrifice a lamb to Guido for each element of the list. |
| long_opts = [ |
| ["milestones=", """Optional, milestones NOT to report on |
| (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt, |
| inapplicable)"""], |
| ["update", """Optional, update the statistics first."""], |
| ["doc", """Optional, print pydocs."""], |
| ["help", """Optional, print usage (this text)."""], |
| ["verbose", """Optional, print more progress messages."""], |
| ] |
| |
| help = 0 |
| verbose = 0 |
| update = 0 |
| |
| DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv" |
| ONE_WEEK = 7 * 24 * 60 * 60 |
| |
| _types = [] |
| _milestone_filter = [] |
| |
| noncore_milestone_filter = [ |
| 'Post-1.0', |
| '1.1', |
| 'cvs2svn-1.0', |
| 'cvs2svn-opt', |
| 'inapplicable', |
| 'no milestone', |
| ] |
| |
| one_point_oh_milestone_filter = noncore_milestone_filter + [] |
| |
| beta_milestone_filter = one_point_oh_milestone_filter + ['1.0'] |
| |
| |
| _types = [ |
| 'DEFECT', |
| 'TASK', |
| 'FEATURE', |
| 'ENHANCEMENT', |
| 'PATCH', |
| ] |
| |
| |
| def main(): |
| """Report bug find/fix rate statistics for Subversion.""" |
| |
| global verbose |
| global update |
| global _types |
| global _milestone_filter |
| global noncore_milestone_filter |
| |
| try: |
| opts, args = my_getopt(sys.argv[1:], "", [x[0] for x in long_opts]) |
| except getopt.GetoptError, e: |
| sys.stderr.write("Error: %s\n" % e.msg) |
| shortusage() |
| sys.stderr.write("%s --help for options.\n" % me) |
| sys.exit(1) |
| |
| for opt, arg in opts: |
| if opt == "--help": |
| usage() |
| sys.exit(0) |
| elif opt == "--verbose": |
| verbose = 1 |
| elif opt == "--milestones": |
| for mstone in arg.split(","): |
| if mstone == "noncore": |
| _milestone_filter = noncore_milestone_filter |
| elif mstone == "beta": |
| _milestone_filter = beta_milestone_filter |
| elif mstone == "one": |
| _milestone_filter = one_point_oh_milestone_filter |
| elif mstone[0] == '-': |
| if mstone[1:] in _milestone_filter: |
| spot = _milestone_filter.index(mstone[1:]) |
| _milestone_filter = _milestone_filter[:spot] \ |
| + _milestone_filter[(spot+1):] |
| else: |
| _milestone_filter += [mstone] |
| |
| elif opt == "--update": |
| update = 1 |
| elif opt == "--doc": |
| pydoc.doc(pydoc.importfile(sys.argv[0])) |
| sys.exit(0) |
| |
| if len(_milestone_filter) == 0: |
| _milestone_filter = noncore_milestone_filter |
| |
| if verbose: |
| sys.stderr.write("%s: Filtering out milestones %s.\n" |
| % (me, ", ".join(_milestone_filter))) |
| |
| if len(args) == 2: |
| if verbose: |
| sys.stderr.write("%s: Generating gnuplot data.\n" % me) |
| if update: |
| if verbose: |
| sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE)) |
| if os.system("curl " + DATA_FILE + "> " + args[0]): |
| os.system("wget " + DATA_FILE) |
| plot(args[0], args[1]) |
| |
| elif len(args) == 3: |
| if verbose: |
| sys.stderr.write("%s: Generating summary from %s to %s.\n" |
| % (me, args[1], args[2])) |
| if update: |
| if verbose: |
| sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE)) |
| if os.system("curl " + DATA_FILE + "> " + args[0]): |
| os.system("wget " + DATA_FILE) |
| |
| try: |
| t_start = parse_time(args[1] + " 00:00:00") |
| except ValueError: |
| sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1])) |
| sys.exit(1) |
| |
| try: |
| t_end = parse_time(args[2] + " 00:00:00") |
| except ValueError: |
| sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2])) |
| sys.exit(1) |
| |
| summary(args[0], t_start, t_end) |
| else: |
| usage() |
| |
| sys.exit(0) |
| |
| |
| def summary(datafile, d_start, d_end): |
| "Prints a summary of activity within a specified date range." |
| |
| data = load_data(datafile) |
| |
| # activity during the requested period |
| found, fixed, inval, dup, other = extract(data, 1, d_start, d_end) |
| |
| # activity from the beginning of time to the end of the request |
| # used to compute remaining |
| # XXX It would be faster to change extract to collect this in one |
| # pass. But we don't presently have enough data, nor use this |
| # enough, to justify that rework. |
| fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \ |
| = extract(data, 1, 0, d_end) |
| |
| alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \ |
| = alltypes_other = alltypes_rem = 0 |
| for t in _types: |
| fromzerorem_t = fromzerofound[t]\ |
| - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t] |
| + fromzeroother[t]) |
| print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \ |
| 'other=%3d remain=%3d' \ |
| % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t)) |
| alltypes_found = alltypes_found + found[t] |
| alltypes_fixed = alltypes_fixed + fixed[t] |
| alltypes_inval = alltypes_inval + inval[t] |
| alltypes_dup = alltypes_dup + dup[t] |
| alltypes_other = alltypes_other + other[t] |
| alltypes_rem = alltypes_rem + fromzerorem_t |
| |
| print('-' * 77) |
| print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \ |
| 'other=%3d remain=%3d' \ |
| % ('totals', alltypes_found, alltypes_fixed, alltypes_inval, |
| alltypes_dup, alltypes_other, alltypes_rem)) |
| # print '%12s find/fix ratio: %g%%' \ |
| # % (" "*12, (alltypes_found*100.0/(alltypes_fixed |
| # + alltypes_inval + alltypes_dup + alltypes_other))) |
| |
| |
| def plot(datafile, outbase): |
| "Generates data files intended for use by gnuplot." |
| |
| global _types |
| |
| data = load_data(datafile) |
| |
| t_min = 1L<<32 |
| for issue in data: |
| if issue.created < t_min: |
| t_min = issue.created |
| |
| # break the time up into a tuple, then back up to Sunday |
| t_start = time.localtime(t_min) |
| t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1, |
| 0, 0, 0, 0, 0, 0)) |
| |
| plots = { } |
| for t in _types: |
| # for each issue type, we will record per-week stats, compute a moving |
| # average of the find/fix delta, and track the number of open issues |
| plots[t] = [ [ ], MovingAverage(), 0 ] |
| |
| week = 0 |
| for date in range(t_start, time.time(), ONE_WEEK): |
| ### this is quite inefficient, as we could just sort by date, but |
| ### I'm being lazy |
| found, fixed = extract(data, None, date, date + ONE_WEEK - 1) |
| |
| for t in _types: |
| per_week, avg, open_issues = plots[t] |
| delta = found[t] - fixed[t] |
| per_week.append((week, date, |
| found[t], -fixed[t], avg.add(delta), open_issues)) |
| plots[t][2] = open_issues + delta |
| |
| week = week + 1 |
| |
| for t in _types: |
| week_data = plots[t][0] |
| write_file(week_data, outbase, t, 'found', 2) |
| write_file(week_data, outbase, t, 'fixed', 3) |
| write_file(week_data, outbase, t, 'avg', 4) |
| write_file(week_data, outbase, t, 'open', 5) |
| |
| def write_file(week_data, base, type, tag, idx): |
| f = open('%s.%s.%s' % (base, tag, type), 'w') |
| for info in week_data: |
| f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1]))) |
| |
| |
| class MovingAverage: |
| "Helper class to compute moving averages." |
| def __init__(self, n=4): |
| self.n = n |
| self.data = [ 0 ] * n |
| def add(self, value): |
| self.data.pop(0) |
| self.data.append(float(value) / self.n) |
| return self.avg() |
| def avg(self): |
| return reduce(operator.add, self.data) |
| |
| |
| def extract(data, details, d_start, d_end): |
| """Extract found/fixed counts for each issue type within the data range. |
| |
| If DETAILS is false, then return two dictionaries: |
| |
| found, fixed |
| |
| ...each mapping issue types to the number of issues of that type |
| found or fixed respectively. |
| |
| If DETAILS is true, return five dictionaries: |
| |
| found, fixed, invalid, duplicate, other |
| |
| The first is still the found issues, but the other four break down |
| the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag |
| category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'.""" |
| |
| global _types |
| global _milestone_filter |
| |
| found = { } |
| fixed = { } |
| invalid = { } |
| duplicate = { } |
| other = { } # "WORKSFORME", "LATER", "REMIND", and "WONTFIX" |
| |
| for t in _types: |
| found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0 |
| |
| for issue in data: |
| # filter out disrespected milestones |
| if issue.milestone in _milestone_filter: |
| continue |
| |
| # record the found/fixed counts |
| if d_start <= issue.created <= d_end: |
| found[issue.type] = found[issue.type] + 1 |
| if d_start <= issue.resolved <= d_end: |
| if details: |
| if issue.resolution == "FIXED": |
| fixed[issue.type] = fixed[issue.type] + 1 |
| elif issue.resolution == "INVALID": |
| invalid[issue.type] = invalid[issue.type] + 1 |
| elif issue.resolution == "DUPLICATE": |
| duplicate[issue.type] = duplicate[issue.type] + 1 |
| else: |
| other[issue.type] = other[issue.type] + 1 |
| else: |
| fixed[issue.type] = fixed[issue.type] + 1 |
| |
| if details: |
| return found, fixed, invalid, duplicate, other |
| else: |
| return found, fixed |
| |
| |
| def load_data(datafile): |
| "Return a list of Issue objects for the specified data." |
| return list(map(Issue, open(datafile).readlines())) |
| |
| |
| class Issue: |
| "Represents a single issue from the exported IssueZilla data." |
| |
| def __init__(self, line): |
| row = line.strip().split('\t') |
| |
| self.id = int(row[0]) |
| self.type = row[1] |
| self.reporter = row[2] |
| if row[3] == 'NULL': |
| self.assigned = None |
| else: |
| self.assigned = row[3] |
| self.milestone = row[4] |
| self.created = parse_time(row[5]) |
| self.resolution = row[7] |
| if not self.resolution: |
| # If the resolution is empty, then force the resolved date to None. |
| # When an issue is reopened, there will still be activity showing |
| # a "RESOLVED", thus we get a resolved date. But we simply want to |
| # ignore that date. |
| self.resolved = None |
| else: |
| self.resolved = parse_time(row[6]) |
| self.summary = row[8] |
| |
| |
| parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) ' |
| '([0-9]{2}):([0-9]{2}):([0-9]{2})') |
| |
| def parse_time(t): |
| "Convert an exported MySQL timestamp into seconds since the epoch." |
| |
| global parse_time_re |
| |
| if t == 'NULL': |
| return None |
| try: |
| matches = parse_time_re.match(t) |
| return time.mktime((int(matches.group(1)), |
| int(matches.group(2)), |
| int(matches.group(3)), |
| int(matches.group(4)), |
| int(matches.group(5)), |
| int(matches.group(6)), |
| 0, 0, -1)) |
| except ValueError: |
| sys.stderr.write('ERROR: bad time value: %s\n'% t) |
| sys.exit(1) |
| |
| def shortusage(): |
| print(pydoc.synopsis(sys.argv[0])) |
| print(""" |
| For simple text summary: |
| find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD |
| |
| For gnuplot presentation: |
| find-fix.py [options] query-set-1.tsv outfile |
| """) |
| |
| def usage(): |
| shortusage() |
| for x in long_opts: |
| padding_limit = 18 |
| if x[0][-1:] == '=': |
| sys.stdout.write(" --%s " % x[0][:-1]) |
| padding_limit = 19 |
| else: |
| sys.stdout.write(" --%s " % x[0]) |
| print("%s %s" % ((' ' * (padding_limit - len(x[0]))), x[1])) |
| print(''' |
| Option keywords may be abbreviated to any unique prefix. |
| Most options require "=xxx" arguments. |
| Option order is not important.''') |
| |
| if __name__ == '__main__': |
| main() |