blob: 04190c1dea3bdfab34fb27fe6b49e56a0a62d85b [file] [log] [blame]
#!/usr/bin/env python
# ====================================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ====================================================================
"""\
Usage: 1. {PROGRAM} [OPTIONS] include INCLUDE-PATH ...
2. {PROGRAM} [OPTIONS] exclude EXCLUDE-PATH ...
Read a Subversion revision log output stream from stdin, analyzing its
revision log history to see what paths would need to be additionally
provided as part of the list of included/excluded paths if trying to
use Subversion's 'svndumpfilter' program to include/exclude paths from
a full dump of a repository's history.
The revision log stream should be the result of 'svn log -v' or 'svn
log -vq' when run against the root of the repository whose history
will be filtered by a user with universal read access to the
repository's data. Do not use the --use-merge-history (-g) or
--stop-on-copy when generating this revision log stream.
Use the default ordering of revisions (that is, '-r HEAD:0').
Return errorcode 0 if there are no additional dependencies found, 1 if
there were; any other errorcode indicates a fatal error.
Paths in mergeinfo are not considered as additional dependencies so the
--skip-missing-merge-sources option of 'svndumpfilter' may be required
for successful filtering with the resulting path list.
Options:
--help (-h) Show this usage message and exit.
--targets FILE Read INCLUDE-PATHs and EXCLUDE-PATHs from FILE,
one path per line.
--verbose (-v) Provide more information. May be used multiple
times for additional levels of information (-vv).
"""
import sys
import os
import getopt
import string
verbosity = 0
class LogStreamError(Exception): pass
class EOFError(Exception): pass
EXIT_SUCCESS = 0
EXIT_MOREDEPS = 1
EXIT_FAILURE = 2
def sanitize_path(path):
return '/'.join(filter(None, path.split('/')))
def subsumes(path, maybe_child):
if path == maybe_child:
return True
if maybe_child.startswith(path + '/'):
return True
return False
def compare_paths(path1, path2):
# Are the paths exactly the same?
if path1 == path2:
return 0
# Skip past common prefix
path1_len = len(path1);
path2_len = len(path2);
min_len = min(path1_len, path2_len)
i = 0
while (i < min_len) and (path1[i] == path2[i]):
i = i + 1
# Children of paths are greater than their parents, but less than
# greater siblings of their parents
char1 = '\0'
char2 = '\0'
if (i < path1_len):
char1 = path1[i]
if (i < path2_len):
char2 = path2[i]
if (char1 == '/') and (i == path2_len):
return 1
if (char2 == '/') and (i == path1_len):
return -1
if (i < path1_len) and (char1 == '/'):
return -1
if (i < path2_len) and (char2 == '/'):
return 1
# Common prefix was skipped above, next character is compared to
# determine order
return cmp(char1, char2)
def log(msg, min_verbosity):
if verbosity >= min_verbosity:
if min_verbosity == 1:
sys.stderr.write("[* ] ")
elif min_verbosity == 2:
sys.stderr.write("[**] ")
sys.stderr.write(msg + "\n")
class DependencyTracker:
def __init__(self, include_paths):
self.include_paths = set(include_paths)
self.dependent_paths = set()
def path_included(self, path):
for include_path in self.include_paths | self.dependent_paths:
if subsumes(include_path, path):
return True
return False
def include_missing_copies(self, path_copies):
while True:
log("Cross-checking %d included paths with %d copies "
"for missing path dependencies..." % (
len(self.include_paths) + len(self.dependent_paths),
len(path_copies)),
1)
included_copies = []
for path, copyfrom_path in path_copies:
if self.path_included(path):
log("Adding copy '%s' -> '%s'" % (copyfrom_path, path), 1)
self.dependent_paths.add(copyfrom_path)
included_copies.append((path, copyfrom_path))
if not included_copies:
log("Found all missing path dependencies", 1)
break
for path, copyfrom_path in included_copies:
path_copies.remove((path, copyfrom_path))
log("Found %d new copy dependencies, need to re-check for more"
% len(included_copies), 1)
def readline(stream):
line = stream.readline()
if not line:
raise EOFError("Unexpected end of stream")
line = line.rstrip('\n\r')
log(line, 2)
return line
def svn_log_stream_get_dependencies(stream, included_paths):
import re
dt = DependencyTracker(included_paths)
header_re = re.compile(r'^r([0-9]+) \|.*$')
action_re = re.compile(r'^ [ADMR] /(.*)$')
copy_action_re = re.compile(r'^ [AR] /(.*) \(from /(.*):[0-9]+\)$')
line_buf = None
last_revision = 0
eof = False
path_copies = set()
found_changed_path = False
while not eof:
try:
line = line_buf is not None and line_buf or readline(stream)
except EOFError:
break
# We should be sitting at a log divider line.
if line != '-' * 72:
raise LogStreamError("Expected log divider line; not found.")
# Next up is a log header line.
try:
line = readline(stream)
except EOFError:
break
match = header_re.search(line)
if not match:
raise LogStreamError("Expected log header line; not found.")
pieces = map(string.strip, line.split('|'))
revision = int(pieces[0][1:])
if last_revision and revision >= last_revision:
raise LogStreamError("Revisions are misordered. Make sure log stream "
"is from 'svn log' with the youngest revisions "
"before the oldest ones (the default ordering).")
log("Parsing revision %d" % (revision), 1)
last_revision = revision
idx = pieces[-1].find(' line')
if idx != -1:
log_lines = int(pieces[-1][:idx])
else:
log_lines = 0
# Now see if there are any changed paths. If so, parse and process them.
line = readline(stream)
if line == 'Changed paths:':
while 1:
try:
line = readline(stream)
except EOFError:
eof = True
break
match = copy_action_re.search(line)
if match:
found_changed_path = True
path_copies.add((sanitize_path(match.group(1)),
sanitize_path(match.group(2))))
elif action_re.search(line):
found_changed_path = True
else:
break
# Finally, skip any log message lines. (If there are none,
# remember the last line we read, because it probably has
# something important in it.)
if log_lines:
for i in range(log_lines):
readline(stream)
line_buf = None
else:
line_buf = line
if not found_changed_path:
raise LogStreamError("No changed paths found; did you remember to run "
"'svn log' with the --verbose (-v) option when "
"generating the input to this script?")
dt.include_missing_copies(path_copies)
return dt
def analyze_logs(included_paths):
print("Initial include paths:")
for path in included_paths:
print(" + /%s" % (path))
dt = svn_log_stream_get_dependencies(sys.stdin, included_paths)
if dt.dependent_paths:
found_new_deps = True
print("Dependent include paths found:")
for path in dt.dependent_paths:
print(" + /%s" % (path))
print("You need to also include them (or one of their parents).")
else:
found_new_deps = False
print("No new dependencies found!")
parents = {}
for path in dt.include_paths:
while 1:
parent = os.path.dirname(path)
if not parent:
break
parents[parent] = 1
path = parent
parents = parents.keys()
if parents:
print("You might still need to manually create parent directories " \
"for the included paths before loading a filtered dump:")
parents.sort(compare_paths)
for parent in parents:
print(" /%s" % (parent))
return found_new_deps and EXIT_MOREDEPS or EXIT_SUCCESS
def usage_and_exit(errmsg=None):
program = os.path.basename(sys.argv[0])
stream = errmsg and sys.stderr or sys.stdout
stream.write(__doc__.replace("{PROGRAM}", program))
if errmsg:
stream.write("\nERROR: %s\n" % (errmsg))
sys.exit(errmsg and EXIT_FAILURE or EXIT_SUCCESS)
def main():
config_dir = None
targets_file = None
try:
opts, args = getopt.getopt(sys.argv[1:], "hv",
["help", "verbose", "targets="])
except getopt.GetoptError as e:
usage_and_exit(str(e))
for option, value in opts:
if option in ['-h', '--help']:
usage_and_exit()
elif option in ['-v', '--verbose']:
global verbosity
verbosity = verbosity + 1
elif option in ['--targets']:
targets_file = value
if len(args) == 0:
usage_and_exit("Not enough arguments")
if targets_file is None:
targets = args[1:]
else:
targets = map(lambda x: x.rstrip('\n\r'),
open(targets_file, 'r').readlines())
if not targets:
usage_and_exit("No target paths specified")
try:
if args[0] == 'include':
sys.exit(analyze_logs(map(sanitize_path, targets)))
elif args[0] == 'exclude':
usage_and_exit("Feature not implemented")
else:
usage_and_exit("Valid subcommands are 'include' and 'exclude'")
except SystemExit:
raise
except (LogStreamError, EOFError) as e:
log("ERROR: " + str(e), 0)
sys.exit(EXIT_FAILURE)
except:
import traceback
exc_type, exc, exc_tb = sys.exc_info()
tb = traceback.format_exception(exc_type, exc, exc_tb)
sys.stderr.write(''.join(tb))
sys.exit(EXIT_FAILURE)
if __name__ == "__main__":
main()