blob: 12c6e21693987c1bcf43f67b62d8ef56367d8256 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from contextlib import contextmanager
from random import randint
from pathlib import Path
import os
import subprocess
import shlex
import filecmp
import shutil
import re
import itertools
import argparse
project_root = Path(__file__).resolve().parent.parent
update_dependency_licenses_cmd = ('mvn license:aggregate-add-third-party@generate-and-check-licenses' +
' -Dlicense.skipAggregateAddThirdParty=false -B')
@contextmanager
def cd(newdir):
prevdir = Path.cwd()
os.chdir(newdir.expanduser())
try:
yield
finally:
os.chdir(prevdir)
def generate_dependency_licenses():
"""Generates DEPENDENCY-LICENSES in target. The committed DEPENDENCY-LICENSES is not modified."""
print('Generating DEPENDENCY-LICENSES')
update_dependency_licenses_output_to_target_cmd = (update_dependency_licenses_cmd +
' -Dlicense.thirdPartyFilename=DEPENDENCY-LICENSES' +
' -Dlicense.outputDirectory=target')
subprocess.check_call(shlex.split(
update_dependency_licenses_output_to_target_cmd))
print('Done generating DEPENDENCY-LICENSES')
def check_dependency_licenses():
"""Compares the regenerated DEPENDENCY-LICENSES in target with the DEPENDENCY-LICENSES in the root, and verifies that they are identical"""
print('Checking DEPENDENCY-LICENSES')
if (not filecmp.cmp(Path('DEPENDENCY-LICENSES'), Path('target') / 'DEPENDENCY-LICENSES', shallow=False)):
print(
f"DEPENDENCY-LICENSES and target/DEPENDENCY-LICENSES are different. Please update DEPENDENCY-LICENSES by running '{update_dependency_licenses_cmd}' in the project root")
return False
return True
def build_storm():
print("Building Storm")
subprocess.check_call(shlex.split(
'mvn clean install -B -DskipTests -Dcheckstyle.skip -Dpmd.skip'
))
print("Done building Storm")
def extract_license_report_maven_coordinates(lines):
# Lines like " * Checker Qual (org.checkerframework:checker-qual:2.5.2 - https://checkerframework.org)"
matches = map(lambda line: re.match(
r'\s+\*.*\((?P<gav>.*) \- .*\).*', line), lines)
return set(map(lambda match: match.group('gav'), filter(lambda match: match != None, matches)))
def parse_license_binary_dependencies_coordinate_set():
"""Gets the dependencies listed in LICENSE-binary"""
license_binary_begin_binary_section = '----------------------------END OF SOURCE NOTICES -------------------------------------------'
license_binary_lines = read_lines(project_root / 'LICENSE-binary')
return extract_license_report_maven_coordinates(
itertools.dropwhile(lambda line: license_binary_begin_binary_section not in line, license_binary_lines))
def extract_dependency_list_maven_coordinates(lines):
# Lines like " com.google.code.findbugs:jsr305:jar:3.0.2 -- module jsr305 (auto)"
matches = map(lambda line: re.match(
r'\s+(?P<group>\S*)\:(?P<artifact>\S*)\:(?P<type>\S*)\:(?P<version>\S*)', line), lines)
return set(map(lambda match: match.group('group') + ':' + match.group('artifact') + ':' + match.group('version'), filter(lambda match: match != None, matches)))
def read_lines(path):
with open(path) as f:
return f.readlines()
def generate_storm_dist_dependencies_coordinate_set():
"""Gets the dependencies for storm-dist/binary, plus the dependencies of storm-shaded-deps"""
generated_coordinate_set = extract_license_report_maven_coordinates(read_lines(
project_root / 'storm-dist' / 'binary' / 'target' / 'generated-sources' / 'license' / 'THIRD-PARTY.txt'))
# Add dependencies from storm-shaded-deps
with cd(project_root / 'storm-shaded-deps'):
print("Generating dependency list for storm-shaded-deps")
subprocess.check_call(shlex.split(
'mvn dependency:list -DoutputFile=target/deps-list -Dmdep.outputScope=false -DincludeScope=compile -B'))
print("Done generating dependency list for storm-shaded-deps")
shaded_dep_coordinates = extract_dependency_list_maven_coordinates(
read_lines(project_root / 'storm-shaded-deps' / 'target' / 'deps-list'))
shaded_dep_coordinates = set(filter(lambda coordinate: 'org.apache.storm:' not in coordinate, shaded_dep_coordinates))
print('The storm-shaded-deps dependencies that are included when distributing storm-dist/binary are ' + str(shaded_dep_coordinates))
print('')
generated_coordinate_set.update(shaded_dep_coordinates)
return generated_coordinate_set
def generate_storm_dist_license_report():
with cd(project_root / 'storm-dist' / 'binary'):
print('')
print('Generating storm-dist license report')
subprocess.check_call(shlex.split(update_dependency_licenses_cmd))
print('Done generating storm-dist license report')
def make_license_binary_checker():
"""Checks that the dependencies in the storm-dist/binary license report are mentioned in LICENSE-binary, and vice versa."""
print('Checking LICENSE-binary')
license_binary_coordinate_set = parse_license_binary_dependencies_coordinate_set()
generated_coordinate_set = generate_storm_dist_dependencies_coordinate_set()
superfluous_coordinates_in_license = license_binary_coordinate_set.difference(
generated_coordinate_set)
coordinates_missing_in_license = generated_coordinate_set.difference(
license_binary_coordinate_set)
print('Done checking LICENSE-binary')
def check_for_errors():
if superfluous_coordinates_in_license:
print('Dependencies in LICENSE-binary that appear unused: ')
for coord in sorted(superfluous_coordinates_in_license):
print(coord)
print('')
if coordinates_missing_in_license:
print('Dependencies missing from LICENSE-binary: ')
for coord in sorted(coordinates_missing_in_license):
print(coord)
any_wrong_coordinates = coordinates_missing_in_license or superfluous_coordinates_in_license
if any_wrong_coordinates:
print('LICENSE-binary needs to be updated. Please remove any unnecessary dependencies from LICENSE-binary, and add any that are missing. You can copy any missing dependencies from DEPENDENCY-LICENSES')
return not any_wrong_coordinates
return check_for_errors
with cd(project_root):
parser = argparse.ArgumentParser(description='Validate that the Storm license files are up to date (excluding NOTICE-binary and the licenses/ directory)')
parser.add_argument('--skip-build-storm', action='store_true', help='set to skip building Storm')
args = parser.parse_args()
success = True
if not args.skip_build_storm:
build_storm()
generate_dependency_licenses()
generate_storm_dist_license_report()
license_binary_checker = make_license_binary_checker()
success = check_dependency_licenses() and success
success = license_binary_checker() and success
if not success:
print('Some license files are not up to date, see above for the relevant error message')
exit(1)
print('License files are up to date')
exit(0)