dev-support/checkcompatibility.py - hbase - Git at Google

 #!/usr/bin/env python
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # Script which checks Java API compatibility between two revisions of the
 # Java client.
 #
 # Originally sourced from Apache Kudu, which was based on the
 # compatibility checker from the Apache HBase project, but ported to
 # Python for better readability.

 # The script can be invoked as follows:
 #   $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
 # or with some options:
 #   $ ./dev-support/checkcompatibility.py \
 #      --annotation org.apache.yetus.audience.InterfaceAudience.Public \
 #      --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
 #      --include-file "hbase-*" \
 #      --known_problems_path ~/known_problems.json \
 #      rel/1.3.0 branch-1.4

 import json
 import logging
 import os
 import re
 import shutil
 import subprocess
 import sys
 import urllib2
 from collections import namedtuple
 try:
     import argparse
 except ImportError:
     logging.error(
         "Please install argparse, e.g. via `pip install argparse`.")
     sys.exit(2)

 # Various relative paths
 REPO_DIR = os.getcwd()


 def check_output(*popenargs, **kwargs):
     """ Run command with arguments and return its output as a byte string.
     Backported from Python 2.7 as it's implemented as pure python on stdlib.
     >>> check_output(['/usr/bin/python', '--version'])
     Python 2.6.2 """
     process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
     output, _ = process.communicate()
     retcode = process.poll()
     if retcode:
         cmd = kwargs.get("args")
         if cmd is None:
             cmd = popenargs[0]
         error = subprocess.CalledProcessError(retcode, cmd)
         error.output = output
         raise error
     return output


 def get_repo_dir():
     """ Return the path to the top of the repo. """
     dirname, _ = os.path.split(os.path.abspath(__file__))
     dirname = os.path.dirname(dirname)
     logging.debug("Repo dir is  %s", dirname)
     return dirname


 def get_scratch_dir():
     """ Return the path to the scratch dir that we build within. """
     scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
     if not os.path.exists(scratch_dir):
         os.makedirs(scratch_dir)
     return scratch_dir


 def get_java_acc_dir():
     """ Return the path where we check out the Java API Compliance Checker. """
     return os.path.join(get_repo_dir(), "target", "java-acc")


 def clean_scratch_dir(scratch_dir):
     """ Clean up and re-create the scratch directory. """
     if os.path.exists(scratch_dir):
         logging.info("Removing scratch dir %s ", scratch_dir)
         shutil.rmtree(scratch_dir)
     logging.info("Creating empty scratch dir %s ", scratch_dir)
     os.makedirs(scratch_dir)


 def checkout_java_tree(rev, path):
     """ Check out the Java source tree for the given revision into
     the given path. """
     logging.info("Checking out %s in %s", rev, path)
     os.makedirs(path)
     # Extract java source
     subprocess.check_call(["bash", '-o', 'pipefail', "-c",
                            ("git archive --format=tar %s | "
                             "tar -C '%s' -xf -") % (rev, path)],
                           cwd=get_repo_dir())


 def get_git_hash(revname):
     """ Convert 'revname' to its SHA-1 hash. """
     try:
         return check_output(["git", "rev-parse", revname],
                         cwd=get_repo_dir()).strip()
     except:
         revname = "origin/" + revname
         return check_output(["git", "rev-parse", revname],
                         cwd=get_repo_dir()).strip()


 def get_repo_name(remote_name="origin"):
     """ Get the name of the repo based on the git remote."""
     remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)],
                            cwd=get_repo_dir()).strip()
     remote = remote.split("/")[-1]
     return remote[:-4] if remote.endswith(".git") else remote


 def build_tree(java_path, verbose):
     """ Run the Java build within 'path'. """
     logging.info("Building in %s ", java_path)
     mvn_cmd = ["mvn", "--batch-mode", "-DskipTests",
                "-Dmaven.javadoc.skip=true", "package"]
     if not verbose:
         mvn_cmd.insert(-1, "--quiet")
     subprocess.check_call(mvn_cmd, cwd=java_path)


 def checkout_java_acc(force):
     """ Check out the Java API Compliance Checker. If 'force' is true, will
     re-download even if the directory exists. """
     acc_dir = get_java_acc_dir()
     if os.path.exists(acc_dir):
         logging.info("Java ACC is already downloaded.")
         if not force:
             return
         logging.info("Forcing re-download.")
         shutil.rmtree(acc_dir)

     logging.info("Downloading Java ACC...")

     url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
     scratch_dir = get_scratch_dir()
     path = os.path.join(scratch_dir, os.path.basename(url))
     jacc = urllib2.urlopen(url)
     with open(path, 'wb') as w:
         w.write(jacc.read())

     subprocess.check_call(["tar", "xzf", path],
                           cwd=scratch_dir)

     shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"),
                 os.path.join(acc_dir))


 def find_jars(path):
     """ Return a list of jars within 'path' to be checked for compatibility. """
     all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())

     return [j for j in all_jars if (
         "-tests" not in j and
         "-sources" not in j and
         "-with-dependencies" not in j)]


 def write_xml_file(path, version, jars):
     """ Write the XML manifest file for JACC. """
     with open(path, "wt") as f:
         f.write("<version>%s</version>\n" % version)
         f.write("<archives>")
         for j in jars:
             f.write("%s\n" % j)
         f.write("</archives>")


 def ascii_encode_dict(data):
     """ Iterate through a dictionary of data and convert all unicode to ascii.
     This method was taken from
     stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
     ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
     return dict(map(ascii_encode, pair) for pair in data.items())


 def process_json(path):
     """ Process the known problems json file. The program raises an uncaught exception
     if it can't find the file or if the json is invalid """
     path = os.path.abspath(os.path.expanduser(path))
     try:
         with open(path) as f:
             return json.load(f, object_hook=ascii_encode_dict)
     except ValueError as e:
         logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e))
         raise
     except IOError as io:
         logging.error("Provided json file path does not exist %s", str(path))
         raise


 def compare_results(tool_results, known_issues, compare_warnings):
     """ Compare the number of problems found with the allowed number. If
     compare_warnings is true then also compare the number of warnings found.

     tool_results = results from the JACC tool - a dictionary
     known_issues = dictionary of expected issue count
     compare_warnings = boolean - if true also compare warnings as well as problems """
     logging.info("Results: %s", str(tool_results))

     unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type',
                                                         'known_count', 'observed_count'])
     unexpected_issues = [unexpected_issue(check=check,  issue_type=issue_type,
                                       known_count=known_count,
                                       observed_count=tool_results[check][issue_type])
                      for check, known_issue_counts in known_issues.items()
                         for issue_type, known_count in known_issue_counts.items()
                             if tool_results[check][issue_type] > known_count]

     if not compare_warnings:
         unexpected_issues = [tup for tup in unexpected_issues
                              if tup.issue_type != 'warnings']

     for issue in unexpected_issues:
         logging.error('Found %s during  %s check (known issues: %d, observed issues: %d)',
                 issue.issue_type, issue.check, issue.known_count, issue.observed_count)

     return bool(unexpected_issues)


 def process_java_acc_output(output):
     """ Process the output string to find the problems and warnings in both the
     binary and source compatibility. This is done in a way that is admittedly
     brittle; we are open to better implementations.

     We expect a line containing the relevant information to look something like:
     "total binary compatibility problems: 123, warnings: 16" """
     return_value = {}
     output = output.split("\n")
     for line in output:
         # Line has relevant info
         if line.lower().startswith("total"):
             values = {}
             # Remove "total" keyword
             line = line[6:]
             # Seperate the two valuable parts
             line_list = line.split(",")
             for segment in line_list:
                 part = segment.split(":")
                 # Extract key and value
                 values[part[0][-8:]] = int(part[1])
             return_value[line[:6]] = values
     return return_value

 def log_java_acc_version():
     java_acc_path = os.path.join(
         get_java_acc_dir(), "japi-compliance-checker.pl")

     args = ["perl", java_acc_path, "-dumpversion"]
     logging.info("Java ACC version: " + check_output(args))

 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name):
     """ Run the compliance checker to compare 'src' and 'dst'. """
     logging.info("Will check compatibility between original jars:\n\t%s\n"
                  "and new jars:\n\t%s",
                  "\n\t".join(src_jars),
                  "\n\t".join(dst_jars))

     java_acc_path = os.path.join(
         get_java_acc_dir(), "japi-compliance-checker.pl")

     src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
     dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
     write_xml_file(src_xml_path, src_name, src_jars)
     write_xml_file(dst_xml_path, dst_name, dst_jars)

     out_path = os.path.join(get_scratch_dir(), "report.html")

     args = ["perl", java_acc_path,
             "-l", name,
             "-d1", src_xml_path,
             "-d2", dst_xml_path,
             "-report-path", out_path]
     if annotations is not None:
         logging.info("Annotations are: %s", annotations)
         annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
         logging.info("Annotations path: %s", annotations_path)
         with file(annotations_path, "w") as f:
             f.write('\n'.join(annotations))
         args.extend(["-annotations-list", annotations_path])

     if skip_annotations is not None:
         skip_annotations_path = os.path.join(
             get_scratch_dir(), "skip_annotations.txt")
         with file(skip_annotations_path, "w") as f:
             f.write('\n'.join(skip_annotations))
         args.extend(["-skip-annotations-list", skip_annotations_path])

     try:
         output = check_output(args)
     except subprocess.CalledProcessError as e:
         # The program returns a nonzero error code if issues are found. We
         # almost always expect some issues and want to process the results.
         output = e.output
     acc_processed = process_java_acc_output(output)
     return acc_processed


 def get_known_problems(json_path, src_rev, dst_rev):
     """ The json file should be in the following format: a dictionary with the
     keys in the format source_branch/destination_branch and the values
     dictionaries with binary and source problems and warnings
     Example:
     {'branch-1.3': {
       'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
                       'source': {'problems': 167, 'warnings': 1}},
       'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
                       'source': {'problems': 0, 'warnings': 0}}
       },
     'branch-1.4': {
       'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
                       'source': {'problems': 23, 'warnings': 0}}
       }
     } """
     # These are the default values for allowed problems and warnings
     known_problems = {"binary": {"problems": 0, "warnings": 0},
                            "source": {"problems": 0, "warnings": 0}}
     if src_rev.startswith("origin/"):
       src_rev = src_rev[7:]
     if dst_rev.startswith("origin/"):
       dst_rev = dst_rev[7:]
     if json_path is not None:
         known_problems = process_json(json_path)
         try:
             return known_problems[src_rev][dst_rev]
         except KeyError:
             logging.error(("Known Problems values for %s %s are not in "
                             "provided json file. If you are trying to run "
                             "the test with the default values, don't "
                             "provide the --known_problems_path argument")
                             % (src_rev, dst_rev))
             raise
     return known_problems


 def filter_jars(jars, include_filters, exclude_filters):
     """ Filter the list of JARs based on include and exclude filters. """
     filtered = []
     # Apply include filters
     for j in jars:
         basename = os.path.basename(j)
         for f in include_filters:
             if f.match(basename):
                 filtered += [j]
                 break
         else:
             logging.debug("Ignoring JAR %s", j)
     # Apply exclude filters
     exclude_filtered = []
     for j in filtered:
         basename = os.path.basename(j)
         for f in exclude_filters:
             if f.match(basename):
                 logging.debug("Ignoring JAR %s", j)
                 break
         else:
             exclude_filtered += [j]

     return exclude_filtered


 def main():
     """ Main function. """
     logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(
         description="Run Java API Compliance Checker.")
     parser.add_argument("-f", "--force-download",
                         action="store_true",
                         help="Download dependencies (i.e. Java JAVA_ACC) "
                         "even if they are already present")
     parser.add_argument("-i", "--include-file",
                         action="append",
                         dest="include_files",
                         help="Regex filter for JAR files to be included. "
                         "Applied before the exclude filters. "
                         "Can be specified multiple times.")
     parser.add_argument("-e", "--exclude-file",
                         action="append",
                         dest="exclude_files",
                         help="Regex filter for JAR files to be excluded. "
                         "Applied after the include filters. "
                         "Can be specified multiple times.")
     parser.add_argument("-a", "--annotation",
                         action="append",
                         dest="annotations",
                         help="Fully-qualified Java annotation. "
                         "Java ACC will only check compatibility of "
                         "annotated classes. Can be specified multiple times.")
     parser.add_argument("--skip-annotation",
                         action="append",
                         dest="skip_annotations",
                         help="Fully-qualified Java annotation. "
                         "Java ACC will not check compatibility of "
                         "these annotated classes. Can be specified multiple "
                         "times.")
     parser.add_argument("-p", "--known_problems_path",
                         default=None, dest="known_problems_path",
                         help="Path to file with json 'known_problems "
                         "dictionary.' Path can be relative or absolute. An "
                         "examples file can be seen in the pydocs for the "
                         "get_known_problems method.")
     parser.add_argument("--skip-clean",
                         action="store_true",
                         help="Skip cleaning the scratch directory.")
     parser.add_argument("--compare-warnings", dest="compare_warnings",
                         action="store_true", default=False,
                         help="Compare warnings as well as problems.")
     parser.add_argument("--skip-build",
                         action="store_true",
                         help="Skip building the projects.")
     parser.add_argument("--verbose",
                         action="store_true",
                         help="more output")
     parser.add_argument("-r", "--remote", default="origin", dest="remote_name",
                         help="Name of remote to use. e.g. its repo name will be used as the name "
                         "we pass to Java ACC for the library.")
     parser.add_argument("src_rev", nargs=1, help="Source revision.")
     parser.add_argument("dst_rev", nargs="?", default="HEAD",
                         help="Destination revision. "
                         "If not specified, will use HEAD.")

     args = parser.parse_args()

     src_rev, dst_rev = args.src_rev[0], args.dst_rev

     logging.info("Source revision: %s", src_rev)
     logging.info("Destination revision: %s", dst_rev)

     # Configure the expected numbers
     known_problems = get_known_problems(
         args.known_problems_path, src_rev, dst_rev)

     # Construct the JAR regex patterns for filtering.
     include_filters = []
     if args.include_files is not None:
         for f in args.include_files:
             logging.info("Applying JAR filename include filter: %s", f)
             include_filters += [re.compile(f)]
     else:
         include_filters = [re.compile(".*")]

     exclude_filters = []
     if args.exclude_files is not None:
         for f in args.exclude_files:
             logging.info("Applying JAR filename exclude filter: %s", f)
             exclude_filters += [re.compile(f)]

     # Construct the annotation list
     if args.annotations is not None:
         logging.info("Filtering classes using %d annotation(s):",
                      len(args.annotations))
         for a in args.annotations:
             logging.info("\t%s", a)

     skip_annotations = args.skip_annotations
     if skip_annotations is not None:
         logging.info("Skipping classes with %d annotation(s):",
                      len(skip_annotations))
         for a in skip_annotations:
             logging.info("\t%s", a)

     # Download deps.
     checkout_java_acc(args.force_download)
     log_java_acc_version()

     # Set up the build.
     scratch_dir = get_scratch_dir()
     src_dir = os.path.join(scratch_dir, "src")
     dst_dir = os.path.join(scratch_dir, "dst")

     if args.skip_clean:
         logging.info("Skipping cleaning the scratch directory")
     else:
         clean_scratch_dir(scratch_dir)
         # Check out the src and dst source trees.
         checkout_java_tree(get_git_hash(src_rev), src_dir)
         checkout_java_tree(get_git_hash(dst_rev), dst_dir)

     # Run the build in each.
     if args.skip_build:
         logging.info("Skipping the build")
     else:
         build_tree(src_dir, args.verbose)
         build_tree(dst_dir, args.verbose)

     # Find the JARs.
     src_jars = find_jars(src_dir)
     dst_jars = find_jars(dst_dir)

     # Filter the JARs.
     src_jars = filter_jars(src_jars, include_filters, exclude_filters)
     dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)

     if not src_jars or not dst_jars:
         logging.error("No JARs found! Are your filters too strong?")
         sys.exit(1)

     output = run_java_acc(src_rev, src_jars, dst_rev,
                             dst_jars, args.annotations, skip_annotations,
                             get_repo_name(args.remote_name))
     sys.exit(compare_results(output, known_problems,
                               args.compare_warnings))


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# Script which checks Java API compatibility between two revisions of the
	# Java client.
	#
	# Originally sourced from Apache Kudu, which was based on the
	# compatibility checker from the Apache HBase project, but ported to
	# Python for better readability.

	# The script can be invoked as follows:
	# $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG}
	# or with some options:
	# $ ./dev-support/checkcompatibility.py \
	# --annotation org.apache.yetus.audience.InterfaceAudience.Public \
	# --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
	# --include-file "hbase-*" \
	# --known_problems_path ~/known_problems.json \
	# rel/1.3.0 branch-1.4

	import json
	import logging
	import os
	import re
	import shutil
	import subprocess
	import sys
	import urllib2
	from collections import namedtuple
	try:
	import argparse
	except ImportError:
	logging.error(
	"Please install argparse, e.g. via `pip install argparse`.")
	sys.exit(2)

	# Various relative paths
	REPO_DIR = os.getcwd()


	def check_output(popenargs, *kwargs):
	""" Run command with arguments and return its output as a byte string.
	Backported from Python 2.7 as it's implemented as pure python on stdlib.
	>>> check_output(['/usr/bin/python', '--version'])
	Python 2.6.2 """
	process = subprocess.Popen(stdout=subprocess.PIPE, popenargs, *kwargs)
	output, _ = process.communicate()
	retcode = process.poll()
	if retcode:
	cmd = kwargs.get("args")
	if cmd is None:
	cmd = popenargs[0]
	error = subprocess.CalledProcessError(retcode, cmd)
	error.output = output
	raise error
	return output


	def get_repo_dir():
	""" Return the path to the top of the repo. """
	dirname, _ = os.path.split(os.path.abspath(__file__))
	dirname = os.path.dirname(dirname)
	logging.debug("Repo dir is %s", dirname)
	return dirname


	def get_scratch_dir():
	""" Return the path to the scratch dir that we build within. """
	scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
	if not os.path.exists(scratch_dir):
	os.makedirs(scratch_dir)
	return scratch_dir


	def get_java_acc_dir():
	""" Return the path where we check out the Java API Compliance Checker. """
	return os.path.join(get_repo_dir(), "target", "java-acc")


	def clean_scratch_dir(scratch_dir):
	""" Clean up and re-create the scratch directory. """
	if os.path.exists(scratch_dir):
	logging.info("Removing scratch dir %s ", scratch_dir)
	shutil.rmtree(scratch_dir)
	logging.info("Creating empty scratch dir %s ", scratch_dir)
	os.makedirs(scratch_dir)


	def checkout_java_tree(rev, path):
	""" Check out the Java source tree for the given revision into
	the given path. """
	logging.info("Checking out %s in %s", rev, path)
	os.makedirs(path)
	# Extract java source
	subprocess.check_call(["bash", '-o', 'pipefail', "-c",
	("git archive --format=tar %s \| "
	"tar -C '%s' -xf -") % (rev, path)],
	cwd=get_repo_dir())


	def get_git_hash(revname):
	""" Convert 'revname' to its SHA-1 hash. """
	try:
	return check_output(["git", "rev-parse", revname],
	cwd=get_repo_dir()).strip()
	except:
	revname = "origin/" + revname
	return check_output(["git", "rev-parse", revname],
	cwd=get_repo_dir()).strip()


	def get_repo_name(remote_name="origin"):
	""" Get the name of the repo based on the git remote."""
	remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)],
	cwd=get_repo_dir()).strip()
	remote = remote.split("/")[-1]
	return remote[:-4] if remote.endswith(".git") else remote


	def build_tree(java_path, verbose):
	""" Run the Java build within 'path'. """
	logging.info("Building in %s ", java_path)
	mvn_cmd = ["mvn", "--batch-mode", "-DskipTests",
	"-Dmaven.javadoc.skip=true", "package"]
	if not verbose:
	mvn_cmd.insert(-1, "--quiet")
	subprocess.check_call(mvn_cmd, cwd=java_path)


	def checkout_java_acc(force):
	""" Check out the Java API Compliance Checker. If 'force' is true, will
	re-download even if the directory exists. """
	acc_dir = get_java_acc_dir()
	if os.path.exists(acc_dir):
	logging.info("Java ACC is already downloaded.")
	if not force:
	return
	logging.info("Forcing re-download.")
	shutil.rmtree(acc_dir)

	logging.info("Downloading Java ACC...")

	url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
	scratch_dir = get_scratch_dir()
	path = os.path.join(scratch_dir, os.path.basename(url))
	jacc = urllib2.urlopen(url)
	with open(path, 'wb') as w:
	w.write(jacc.read())

	subprocess.check_call(["tar", "xzf", path],
	cwd=scratch_dir)

	shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"),
	os.path.join(acc_dir))


	def find_jars(path):
	""" Return a list of jars within 'path' to be checked for compatibility. """
	all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())

	return [j for j in all_jars if (
	"-tests" not in j and
	"-sources" not in j and
	"-with-dependencies" not in j)]


	def write_xml_file(path, version, jars):
	""" Write the XML manifest file for JACC. """
	with open(path, "wt") as f:
	f.write("<version>%s</version>\n" % version)
	f.write("<archives>")
	for j in jars:
	f.write("%s\n" % j)
	f.write("</archives>")


	def ascii_encode_dict(data):
	""" Iterate through a dictionary of data and convert all unicode to ascii.
	This method was taken from
	stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
	ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
	return dict(map(ascii_encode, pair) for pair in data.items())


	def process_json(path):
	""" Process the known problems json file. The program raises an uncaught exception
	if it can't find the file or if the json is invalid """
	path = os.path.abspath(os.path.expanduser(path))
	try:
	with open(path) as f:
	return json.load(f, object_hook=ascii_encode_dict)
	except ValueError as e:
	logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e))
	raise
	except IOError as io:
	logging.error("Provided json file path does not exist %s", str(path))
	raise


	def compare_results(tool_results, known_issues, compare_warnings):
	""" Compare the number of problems found with the allowed number. If
	compare_warnings is true then also compare the number of warnings found.

	tool_results = results from the JACC tool - a dictionary
	known_issues = dictionary of expected issue count
	compare_warnings = boolean - if true also compare warnings as well as problems """
	logging.info("Results: %s", str(tool_results))

	unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type',
	'known_count', 'observed_count'])
	unexpected_issues = [unexpected_issue(check=check, issue_type=issue_type,
	known_count=known_count,
	observed_count=tool_results[check][issue_type])
	for check, known_issue_counts in known_issues.items()
	for issue_type, known_count in known_issue_counts.items()
	if tool_results[check][issue_type] > known_count]

	if not compare_warnings:
	unexpected_issues = [tup for tup in unexpected_issues
	if tup.issue_type != 'warnings']

	for issue in unexpected_issues:
	logging.error('Found %s during %s check (known issues: %d, observed issues: %d)',
	issue.issue_type, issue.check, issue.known_count, issue.observed_count)

	return bool(unexpected_issues)


	def process_java_acc_output(output):
	""" Process the output string to find the problems and warnings in both the
	binary and source compatibility. This is done in a way that is admittedly
	brittle; we are open to better implementations.

	We expect a line containing the relevant information to look something like:
	"total binary compatibility problems: 123, warnings: 16" """
	return_value = {}
	output = output.split("\n")
	for line in output:
	# Line has relevant info
	if line.lower().startswith("total"):
	values = {}
	# Remove "total" keyword
	line = line[6:]
	# Seperate the two valuable parts
	line_list = line.split(",")
	for segment in line_list:
	part = segment.split(":")
	# Extract key and value
	values[part[0][-8:]] = int(part[1])
	return_value[line[:6]] = values
	return return_value

	def log_java_acc_version():
	java_acc_path = os.path.join(
	get_java_acc_dir(), "japi-compliance-checker.pl")

	args = ["perl", java_acc_path, "-dumpversion"]
	logging.info("Java ACC version: " + check_output(args))

	def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name):
	""" Run the compliance checker to compare 'src' and 'dst'. """
	logging.info("Will check compatibility between original jars:\n\t%s\n"
	"and new jars:\n\t%s",
	"\n\t".join(src_jars),
	"\n\t".join(dst_jars))

	java_acc_path = os.path.join(
	get_java_acc_dir(), "japi-compliance-checker.pl")

	src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
	dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
	write_xml_file(src_xml_path, src_name, src_jars)
	write_xml_file(dst_xml_path, dst_name, dst_jars)

	out_path = os.path.join(get_scratch_dir(), "report.html")

	args = ["perl", java_acc_path,
	"-l", name,
	"-d1", src_xml_path,
	"-d2", dst_xml_path,
	"-report-path", out_path]
	if annotations is not None:
	logging.info("Annotations are: %s", annotations)
	annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
	logging.info("Annotations path: %s", annotations_path)
	with file(annotations_path, "w") as f:
	f.write('\n'.join(annotations))
	args.extend(["-annotations-list", annotations_path])

	if skip_annotations is not None:
	skip_annotations_path = os.path.join(
	get_scratch_dir(), "skip_annotations.txt")
	with file(skip_annotations_path, "w") as f:
	f.write('\n'.join(skip_annotations))
	args.extend(["-skip-annotations-list", skip_annotations_path])

	try:
	output = check_output(args)
	except subprocess.CalledProcessError as e:
	# The program returns a nonzero error code if issues are found. We
	# almost always expect some issues and want to process the results.
	output = e.output
	acc_processed = process_java_acc_output(output)
	return acc_processed


	def get_known_problems(json_path, src_rev, dst_rev):
	""" The json file should be in the following format: a dictionary with the
	keys in the format source_branch/destination_branch and the values
	dictionaries with binary and source problems and warnings
	Example:
	{'branch-1.3': {
	'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
	'source': {'problems': 167, 'warnings': 1}},
	'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
	'source': {'problems': 0, 'warnings': 0}}
	},
	'branch-1.4': {
	'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
	'source': {'problems': 23, 'warnings': 0}}
	}
	} """
	# These are the default values for allowed problems and warnings
	known_problems = {"binary": {"problems": 0, "warnings": 0},
	"source": {"problems": 0, "warnings": 0}}
	if src_rev.startswith("origin/"):
	src_rev = src_rev[7:]
	if dst_rev.startswith("origin/"):
	dst_rev = dst_rev[7:]
	if json_path is not None:
	known_problems = process_json(json_path)
	try:
	return known_problems[src_rev][dst_rev]
	except KeyError:
	logging.error(("Known Problems values for %s %s are not in "
	"provided json file. If you are trying to run "
	"the test with the default values, don't "
	"provide the --known_problems_path argument")
	% (src_rev, dst_rev))
	raise
	return known_problems


	def filter_jars(jars, include_filters, exclude_filters):
	""" Filter the list of JARs based on include and exclude filters. """
	filtered = []
	# Apply include filters
	for j in jars:
	basename = os.path.basename(j)
	for f in include_filters:
	if f.match(basename):
	filtered += [j]
	break
	else:
	logging.debug("Ignoring JAR %s", j)
	# Apply exclude filters
	exclude_filtered = []
	for j in filtered:
	basename = os.path.basename(j)
	for f in exclude_filters:
	if f.match(basename):
	logging.debug("Ignoring JAR %s", j)
	break
	else:
	exclude_filtered += [j]

	return exclude_filtered


	def main():
	""" Main function. """
	logging.basicConfig(level=logging.INFO)
	parser = argparse.ArgumentParser(
	description="Run Java API Compliance Checker.")
	parser.add_argument("-f", "--force-download",
	action="store_true",
	help="Download dependencies (i.e. Java JAVA_ACC) "
	"even if they are already present")
	parser.add_argument("-i", "--include-file",
	action="append",
	dest="include_files",
	help="Regex filter for JAR files to be included. "
	"Applied before the exclude filters. "
	"Can be specified multiple times.")
	parser.add_argument("-e", "--exclude-file",
	action="append",
	dest="exclude_files",
	help="Regex filter for JAR files to be excluded. "
	"Applied after the include filters. "
	"Can be specified multiple times.")
	parser.add_argument("-a", "--annotation",
	action="append",
	dest="annotations",
	help="Fully-qualified Java annotation. "
	"Java ACC will only check compatibility of "
	"annotated classes. Can be specified multiple times.")
	parser.add_argument("--skip-annotation",
	action="append",
	dest="skip_annotations",
	help="Fully-qualified Java annotation. "
	"Java ACC will not check compatibility of "
	"these annotated classes. Can be specified multiple "
	"times.")
	parser.add_argument("-p", "--known_problems_path",
	default=None, dest="known_problems_path",
	help="Path to file with json 'known_problems "
	"dictionary.' Path can be relative or absolute. An "
	"examples file can be seen in the pydocs for the "
	"get_known_problems method.")
	parser.add_argument("--skip-clean",
	action="store_true",
	help="Skip cleaning the scratch directory.")
	parser.add_argument("--compare-warnings", dest="compare_warnings",
	action="store_true", default=False,
	help="Compare warnings as well as problems.")
	parser.add_argument("--skip-build",
	action="store_true",
	help="Skip building the projects.")
	parser.add_argument("--verbose",
	action="store_true",
	help="more output")
	parser.add_argument("-r", "--remote", default="origin", dest="remote_name",
	help="Name of remote to use. e.g. its repo name will be used as the name "
	"we pass to Java ACC for the library.")
	parser.add_argument("src_rev", nargs=1, help="Source revision.")
	parser.add_argument("dst_rev", nargs="?", default="HEAD",
	help="Destination revision. "
	"If not specified, will use HEAD.")

	args = parser.parse_args()

	src_rev, dst_rev = args.src_rev[0], args.dst_rev

	logging.info("Source revision: %s", src_rev)
	logging.info("Destination revision: %s", dst_rev)

	# Configure the expected numbers
	known_problems = get_known_problems(
	args.known_problems_path, src_rev, dst_rev)

	# Construct the JAR regex patterns for filtering.
	include_filters = []
	if args.include_files is not None:
	for f in args.include_files:
	logging.info("Applying JAR filename include filter: %s", f)
	include_filters += [re.compile(f)]
	else:
	include_filters = [re.compile(".*")]

	exclude_filters = []
	if args.exclude_files is not None:
	for f in args.exclude_files:
	logging.info("Applying JAR filename exclude filter: %s", f)
	exclude_filters += [re.compile(f)]

	# Construct the annotation list
	if args.annotations is not None:
	logging.info("Filtering classes using %d annotation(s):",
	len(args.annotations))
	for a in args.annotations:
	logging.info("\t%s", a)

	skip_annotations = args.skip_annotations
	if skip_annotations is not None:
	logging.info("Skipping classes with %d annotation(s):",
	len(skip_annotations))
	for a in skip_annotations:
	logging.info("\t%s", a)

	# Download deps.
	checkout_java_acc(args.force_download)
	log_java_acc_version()

	# Set up the build.
	scratch_dir = get_scratch_dir()
	src_dir = os.path.join(scratch_dir, "src")
	dst_dir = os.path.join(scratch_dir, "dst")

	if args.skip_clean:
	logging.info("Skipping cleaning the scratch directory")
	else:
	clean_scratch_dir(scratch_dir)
	# Check out the src and dst source trees.
	checkout_java_tree(get_git_hash(src_rev), src_dir)
	checkout_java_tree(get_git_hash(dst_rev), dst_dir)

	# Run the build in each.
	if args.skip_build:
	logging.info("Skipping the build")
	else:
	build_tree(src_dir, args.verbose)
	build_tree(dst_dir, args.verbose)

	# Find the JARs.
	src_jars = find_jars(src_dir)
	dst_jars = find_jars(dst_dir)

	# Filter the JARs.
	src_jars = filter_jars(src_jars, include_filters, exclude_filters)
	dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)

	if not src_jars or not dst_jars:
	logging.error("No JARs found! Are your filters too strong?")
	sys.exit(1)

	output = run_java_acc(src_rev, src_jars, dst_rev,
	dst_jars, args.annotations, skip_annotations,
	get_repo_name(args.remote_name))
	sys.exit(compare_results(output, known_problems,
	args.compare_warnings))


	if __name__ == "__main__":
	main()