blob: c8489008cae67d62f30c03f0098a9a5082ba015b [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import re
import sys
import tarfile
import tempfile
import subprocess
import argparse
# Constant: Regex to extract dependency tokens from the LICENSE file.
# Matches lines that start with a dash and then a dependency token of the form:
# DependencyName-x.y, DependencyName-x.y.z, or DependencyName-x.y.z.w
# Optionally, a trailing suffix (e.g., "-alpha") is captured.
LICENSE_DEP_PATTERN = re.compile(
r'^\s*-\s*([A-Za-z0-9_.+-]+-[0-9]+\.[0-9]+(?:\.[0-9]+){0,2}(?:[-.][A-Za-z0-9]+)?)',
re.MULTILINE
)
def run_gradlew(project_dir):
print("Running './gradlew clean releaseTarGz'")
subprocess.run(["./gradlew", "clean", "releaseTarGz"], check=True, cwd=project_dir)
def get_tarball_path(project_dir):
distributions_dir = os.path.join(project_dir, "core", "build", "distributions")
if not os.path.isdir(distributions_dir):
print("Error: Distributions directory not found:", distributions_dir)
sys.exit(1)
pattern = re.compile(r'^kafka_2\.13-(?!.*docs).+\.tgz$', re.IGNORECASE)
candidates = [
os.path.join(distributions_dir, f)
for f in os.listdir(distributions_dir)
if pattern.match(f)
]
if not candidates:
print("Error: No tarball matching 'kafka_2.13-*.tgz' found in:", distributions_dir)
sys.exit(1)
tarball_path = max(candidates, key=os.path.getmtime)
return tarball_path
def extract_tarball(tarball, extract_dir):
with tarfile.open(tarball, "r:gz") as tar:
# Use a filter to avoid future deprecation warnings.
tar.extractall(path=extract_dir, filter=lambda tarinfo, dest: tarinfo)
print("Tarball extracted to:", extract_dir)
def get_libs_set(libs_dir):
return {
fname[:-4]
for fname in os.listdir(libs_dir)
if fname.endswith(".jar") and not re.search(r"(kafka|connect|trogdor)", fname, re.IGNORECASE)
}
def get_license_deps(license_text):
return set(LICENSE_DEP_PATTERN.findall(license_text))
def main():
# Argument parser
parser = argparse.ArgumentParser(description="Whether to skip executing ReleaseTarGz.")
parser.add_argument("--skip-build", action="store_true", help="skip the build")
args = parser.parse_args()
# Assume the current working directory is the project root.
project_dir = os.getcwd()
print("Using project directory:", project_dir)
if args.skip_build:
print("Skip running './gradlew clean releaseTarGz'")
else:
# Build the tarball.
run_gradlew(project_dir)
tarball = get_tarball_path(project_dir)
print("Tarball created at:", tarball)
# Extract the tarball into a temporary directory.
with tempfile.TemporaryDirectory() as tmp_dir:
extract_tarball(tarball, tmp_dir)
extracted_dirs = os.listdir(tmp_dir)
if not extracted_dirs:
print("Error: No directory found after extraction.")
sys.exit(1)
extracted = os.path.join(tmp_dir, extracted_dirs[0])
print("Tarball extracted to:", extracted)
# Locate the LICENSE file and libs directory.
license_path = os.path.join(extracted, "LICENSE")
libs_dir = os.path.join(extracted, "libs")
if not os.path.exists(license_path) or not os.path.exists(libs_dir):
print("Error: LICENSE file or libs directory not found in the extracted project.")
sys.exit(1)
with open(license_path, "r", encoding="utf-8") as f:
license_text = f.read()
# Get dependency sets.
libs = get_libs_set(libs_dir)
license_deps = get_license_deps(license_text)
print("\nDependencies from libs (extracted from jar names):")
for dep in sorted(libs):
print(" -", dep)
print("\nDependencies extracted from LICENSE file:")
for dep in sorted(license_deps):
print(" -", dep)
# Compare the sets.
missing_in_license = libs - license_deps
extra_in_license = license_deps - libs
if missing_in_license:
print("\nThe following libs (from ./libs) are missing in the LICENSE file. These should be added to the LICENSE-binary file:")
for dep in sorted(missing_in_license):
print(" -", dep)
else:
print("\nAll libs from ./libs are present in the LICENSE file.")
if extra_in_license:
print("\nThe following entries are in the LICENSE file but not present in ./libs. These should be removed from the LICENSE-binary file:")
for dep in sorted(extra_in_license):
print(" -", dep)
else:
print("\nNo extra dependencies in the LICENSE file.")
if missing_in_license or extra_in_license:
sys.exit(1)
if __name__ == "__main__":
main()