blob: 917d2343ca0db6f8429439c9b2d3d2d772c97a18 [file] [log] [blame]
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
## See the usage() function for operating instructions. ##
import re
try:
# Python >=2.6
from functools import reduce
except ImportError:
# Python <2.6
pass
import sys
import operator
_re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)')
_re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)')
_separator = '------------------------------------------------------------\n'
def parse_trails_log(infile):
trails = []
lineno = 0
for line in infile.readlines():
m = _re_trail.match(line)
lineno = lineno + 1
if not m:
sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line))
sys.exit(1)
txn = int(m.group('txn'))
if not txn:
### We're not interested in trails that don't use txns at this point.
continue
txn_body = (m.group('txn_body'), m.group('filename'),
int(m.group('lineno')))
trail = _re_table_op.findall(m.group('ops'))
trail.reverse()
if not trail:
sys.stderr.write('Warning! Empty trail at line %u:\n%s' % (lineno, line))
trails.append((txn_body, trail))
return trails
def output_summary(trails, outfile):
ops = []
for (txn_body, trail) in trails:
ops.append(len(trail))
ops.sort()
total_trails = len(ops)
total_ops = reduce(operator.add, ops)
max_ops = ops[-1]
median_ops = ops[total_trails / 2]
average_ops = float(total_ops) / total_trails
outfile.write(_separator)
outfile.write('Summary\n')
outfile.write(_separator)
outfile.write('Total number of trails: %10i\n' % total_trails)
outfile.write('Total number of ops: %10i\n' % total_ops)
outfile.write('max ops/trail: %10i\n' % max_ops)
outfile.write('median ops/trail: %10i\n' % median_ops)
outfile.write('average ops/trail: %10.2f\n' % average_ops)
outfile.write('\n')
# custom compare function
def _freqtable_cmp(a_b, c_d):
(a, b) = a_b
(c, d) = c_d
c = cmp(d, b)
if not c:
c = cmp(a, c)
return c
def list_frequencies(list):
"""
Given a list, return a list composed of (item, frequency)
in sorted order
"""
counter = {}
for item in list:
counter[item] = counter.get(item, 0) + 1
frequencies = list(counter.items())
frequencies.sort(_freqtable_cmp)
return frequencies
def output_trail_length_frequencies(trails, outfile):
ops = []
for (txn_body, trail) in trails:
ops.append(len(trail))
total_trails = len(ops)
frequencies = list_frequencies(ops)
outfile.write(_separator)
outfile.write('Trail length frequencies\n')
outfile.write(_separator)
outfile.write('ops/trail frequency percentage\n')
for (r, f) in frequencies:
p = float(f) * 100 / total_trails
outfile.write('%4i %6i %5.2f\n' % (r, f, p))
outfile.write('\n')
def output_trail(outfile, trail, column = 0):
### Output the trail itself, in its own column
if len(trail) == 0:
outfile.write('<empty>\n')
return
line = str(trail[0])
for op in trail[1:]:
op_str = str(op)
if len(line) + len(op_str) > 75 - column:
outfile.write('%s,\n' % line)
outfile.write(''.join(' ' * column))
line = op_str
else:
line = line + ', ' + op_str
outfile.write('%s\n' % line)
outfile.write('\n')
def output_trail_frequencies(trails, outfile):
total_trails = len(trails)
ttrails = []
for (txn_body, trail) in trails:
ttrails.append((txn_body, tuple(trail)))
frequencies = list_frequencies(ttrails)
outfile.write(_separator)
outfile.write('Trail frequencies\n')
outfile.write(_separator)
outfile.write('frequency percentage ops/trail trail\n')
for (((txn_body, file, line), trail), f) in frequencies:
p = float(f) * 100 / total_trails
outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line))
outfile.write('%6i %5.2f %4i ' % (f, p, len(trail)))
output_trail(outfile, trail, 37)
def output_txn_body_frequencies(trails, outfile):
bodies = []
for (txn_body, trail) in trails:
bodies.append(txn_body)
total_trails = len(trails)
frequencies = list_frequencies(bodies)
outfile.write(_separator)
outfile.write('txn_body frequencies\n')
outfile.write(_separator)
outfile.write('frequency percentage txn_body\n')
for ((txn_body, file, line), f) in frequencies:
p = float(f) * 100 / total_trails
outfile.write('%6i %5.2f %s - %s:%u\n'
% (f, p, txn_body, file, line))
def usage(pgm):
w = sys.stderr.write
w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm)
w("\n")
w("Usage:\n")
w("\n")
w(" Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n")
w(" it to print trail statistics to stderr. Save the stats to a file,\n")
w(" invoke %s on the file, and ponder the output.\n" % pgm)
w("\n")
if __name__ == '__main__':
if len(sys.argv) > 2:
sys.stderr.write("Error: too many arguments\n\n")
usage(sys.argv[0])
sys.exit(1)
if len(sys.argv) == 1:
infile = sys.stdin
else:
try:
infile = open(sys.argv[1])
except (IOError):
sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1])
usage(sys.argv[0])
sys.exit(1)
trails = parse_trails_log(infile)
output_summary(trails, sys.stdout)
output_trail_length_frequencies(trails, sys.stdout)
output_trail_frequencies(trails, sys.stdout)
output_txn_body_frequencies(trails, sys.stdout)