blob: b296c6dec510d143da7362d69f7335490ca4355a [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Apache RAT is a tool for checking license compliance. This is a script that uses Apache
# RAT to check licenses in Impala.
# It takes as command line parameters two file names - the first is the name of a file
# containing globs of files to ignore, and the second is the XML output of RAT.
# I tested this with
# git archive --prefix=foo/ -o HEAD
# java -jar apache-rat-0.12.jar -x >rat.xml
# bin/ bin/rat_exclude_files.txt rat.xml
# This is copied from a similar file in Apache Kudu. Only RAT 0.12 is supported at this
# time, and the RAT JAR is not included in the Impala repo; it must be downloaded
# separately.
import fnmatch
import re
import sys
import xml.etree.ElementTree as ET
if len(sys.argv) != 3:
sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % (sys.argv[0],))
exclude_globs_filename = sys.argv[1]
xml_filename = sys.argv[2]
globs = [line.strip() for line in open(exclude_globs_filename, "r") if "# " != line[0:2]]
tree = ET.parse(xml_filename)
root = tree.getroot()
all_ok = True
resources = root.findall('resource')
for r in resources:
approvals = r.findall('license-approval')
if approvals and approvals[0].attrib['name'] == 'true':
clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
excluded = False
for g in globs:
if fnmatch.fnmatch(clean_name, g):
excluded = True
if not excluded:
typename = r.findall('type')[0].attrib['name']
if not (clean_name[0:9] == 'testdata/' and typename in ['archive', 'binary']
and clean_name[-4:] <> '.jar'):
"%s: %s\n" %
('UNAPPROVED' if approvals else "NO APPROVALS; " + typename, clean_name))
all_ok = False
if not all_ok:
print 'OK'