| #!/usr/bin/env python3 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| """ |
| Automates updating LICENSE-binary and NOTICE-binary for Apache Pinot releases. |
| |
| Instead of the manual wiki process (temporarily hacking pinot-distribution/pom.xml, |
| generating HTML reports, copy-pasting from a browser, hacking POMs for shade plugin |
| NOTICE aggregation, etc.), this script: |
| |
| LICENSE-binary: |
| 1. Parses pinot-assembly.xml to find ALL modules shipped in the binary |
| 2. Runs `mvn dependency:list` on each to get the real transitive dependency set |
| 3. Parses the current LICENSE-binary |
| 4. Diffs old vs new dependencies |
| 5. Auto-detects licenses for new deps from Maven POM metadata |
| 6. Generates an updated LICENSE-binary and a human-readable report |
| |
| NOTICE-binary: |
| 7. Opens each dependency JAR in ~/.m2/repository |
| 8. Extracts META-INF/NOTICE files (replacing the shade plugin POM hack) |
| 9. Deduplicates and merges into a new NOTICE-binary |
| |
| Prerequisites: |
| - Build the full project first: |
| mvn clean install -DskipTests -T1C |
| - Python 3.7+ |
| |
| Usage: |
| cd /path/to/pinot |
| python3 scripts/update-release-binaries.py # both files (default) |
| python3 scripts/update-release-binaries.py --license-only # LICENSE-binary only |
| python3 scripts/update-release-binaries.py --notice-only # NOTICE-binary only |
| python3 scripts/update-release-binaries.py --report-only # just show diff |
| """ |
| |
| import argparse |
| import os |
| import re |
| import subprocess |
| import sys |
| import xml.etree.ElementTree as ET |
| import zipfile |
| from collections import OrderedDict |
| from pathlib import Path |
| from typing import Dict, List, Optional, Set, Tuple |
| |
| |
| def _version_key(version: str): |
| """ |
| Convert a version string into a tuple suitable for comparison, |
| so that "10.0" > "9.0" works correctly (unlike lexicographic comparison). |
| Non-numeric segments are compared as strings. |
| """ |
| parts = re.split(r"[.\-]", version) |
| key = [] |
| for p in parts: |
| if p.isdigit(): |
| key.append((0, int(p))) |
| else: |
| key.append((1, p)) |
| return tuple(key) |
| |
| |
| # --------------------------------------------------------------------------- |
| # Data types |
| # --------------------------------------------------------------------------- |
| |
| class Dep: |
| """A Maven dependency identified by group:artifact:version.""" |
| |
| def __init__(self, group_id: str, artifact_id: str, version: str): |
| self.group_id = group_id |
| self.artifact_id = artifact_id |
| self.version = version |
| |
| def key(self) -> Tuple[str, str]: |
| return (self.group_id, self.artifact_id) |
| |
| def __str__(self): |
| return f"{self.group_id}:{self.artifact_id}:{self.version}" |
| |
| def __eq__(self, other): |
| return isinstance(other, Dep) and str(self) == str(other) |
| |
| def __hash__(self): |
| return hash(str(self)) |
| |
| |
| class LicenseSection: |
| """A section in LICENSE-binary: a license name + list of deps under it.""" |
| |
| def __init__(self, name: str, preamble_lines: List[str], deps: List[str]): |
| # name: e.g. "Apache License Version 2.0", "MIT License", "BSD 3-Clause" |
| self.name = name |
| # preamble_lines: header/decoration lines before the dep list |
| self.preamble_lines = preamble_lines |
| # deps: list of "group:artifact:version" strings |
| self.deps = deps |
| |
| def __repr__(self): |
| return f"LicenseSection({self.name!r}, {len(self.deps)} deps)" |
| |
| |
| # --------------------------------------------------------------------------- |
| # Known license classification |
| # --------------------------------------------------------------------------- |
| |
| # Maps normalized license names/URLs from Maven POMs to LICENSE-binary section names. |
| # Order matters for matching: first match wins. |
| LICENSE_CLASSIFIERS = [ |
| # Apache 2.0 |
| (r"apache.*2", "Apache License Version 2.0"), |
| (r"asf\s*2", "Apache License Version 2.0"), |
| (r"apache\.org/licenses/LICENSE-2\.0", "Apache License Version 2.0"), |
| |
| # MIT |
| (r"\bmit\b(?!-0)", "MIT License"), |
| (r"opensource\.org/licenses/MIT", "MIT License"), |
| |
| # MIT-0 |
| (r"mit-0|mit.0.license", "MIT-0 License"), |
| |
| # BSD 2-Clause (must come before generic BSD) |
| (r"bsd[\s._-]*2|2[\s._-]*clause.*bsd|simplified.*bsd|freebsd", "BSD 2-Clause"), |
| |
| # BSD 3-Clause (must come before generic BSD) |
| (r"bsd[\s._-]*3|3[\s._-]*clause.*bsd|new.*bsd|revised.*bsd|modified.*bsd", "BSD 3-Clause"), |
| |
| # BSD (plain/unspecified) — fallback after specific variants |
| (r"\bbsd\b", "BSD"), |
| |
| # CDDL 1.0 |
| (r"cddl.*1\.0|common.*development.*distribution.*1\.0", "Common Development and Distribution License (CDDL) 1.0"), |
| |
| # CDDL 1.1 |
| (r"cddl.*1\.1|common.*development.*distribution.*1\.1", "Common Development and Distribution License (CDDL) 1.1"), |
| |
| # EPL 1.0 |
| (r"eclipse.*public.*1\.0|epl.*1\.0", "Eclipse Public License (EPL) 1.0"), |
| |
| # EPL 2.0 |
| (r"eclipse.*public.*2\.0|epl.*2\.0", "Eclipse Public License (EPL) 2.0"), |
| |
| # EDL 1.0 |
| (r"eclipse.*distribution|edl", "Eclipse Distribution License (EDL) 1.0"), |
| |
| # LGPL |
| (r"lgpl|lesser.*general.*public", "LGPL"), |
| |
| # Bouncy Castle |
| (r"bouncy\s*castle", "Bouncy Castle License"), |
| |
| # ISC |
| (r"\bisc\b", "ISC License"), |
| |
| # Go License |
| (r"\bgo\b.*license|golang", "The Go License"), |
| |
| # WTFPL |
| (r"wtfpl|do what the fuck", "WTFPL License"), |
| |
| # Public Domain |
| (r"public\s*domain", "Public Domain"), |
| ] |
| |
| |
| def classify_license(license_name: str, license_url: str = "") -> Optional[str]: |
| """Map a POM license name/URL to a LICENSE-binary section name.""" |
| combined = f"{license_name} {license_url}".lower() |
| for pattern, section_name in LICENSE_CLASSIFIERS: |
| if re.search(pattern, combined): |
| return section_name |
| return None |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 1: Parse assembly descriptor for shipped modules |
| # --------------------------------------------------------------------------- |
| |
| def parse_assembly_modules(pinot_root: Path) -> List[Path]: |
| """Parse pinot-assembly.xml to find all module directories shipped in the binary.""" |
| assembly_file = pinot_root / "pinot-distribution" / "pinot-assembly.xml" |
| if not assembly_file.exists(): |
| print(f"ERROR: Assembly file not found: {assembly_file}", file=sys.stderr) |
| sys.exit(1) |
| |
| tree = ET.parse(assembly_file) |
| root = tree.getroot() |
| |
| modules = set() |
| |
| # Walk all elements looking for <source> tags with ${pinot.root}/.../target/ paths |
| for elem in root.iter(): |
| tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag |
| if tag == "source" and elem.text: |
| source = elem.text.strip() |
| match = re.search(r"\$\{pinot\.root\}/(.+?)/target/", source) |
| if match: |
| module_path = match.group(1) |
| full_path = pinot_root / module_path |
| if full_path.exists() and (full_path / "pom.xml").exists(): |
| modules.add(full_path) |
| |
| # Always include pinot-distribution itself (its deps: pinot-tools, pinot-jdbc-client) |
| dist_path = pinot_root / "pinot-distribution" |
| if dist_path.exists(): |
| modules.add(dist_path) |
| |
| sorted_modules = sorted(modules) |
| print(f"Found {len(sorted_modules)} modules in assembly descriptor:") |
| for m in sorted_modules: |
| print(f" {m.relative_to(pinot_root)}") |
| print() |
| return sorted_modules |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 2: Get Maven artifact IDs for modules |
| # --------------------------------------------------------------------------- |
| |
| def get_maven_artifact_id(module_dir: Path) -> Optional[str]: |
| """Read pom.xml to extract artifactId.""" |
| pom_file = module_dir / "pom.xml" |
| if not pom_file.exists(): |
| return None |
| |
| tree = ET.parse(pom_file) |
| root = tree.getroot() |
| |
| # Handle Maven namespace |
| ns = {"m": "http://maven.apache.org/POM/4.0.0"} |
| elem = root.find("m:artifactId", ns) |
| if elem is None: |
| elem = root.find("artifactId") |
| return elem.text.strip() if elem is not None and elem.text else None |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 3: Run mvn dependency:list |
| # --------------------------------------------------------------------------- |
| |
| def run_dependency_list(pinot_root: Path, module_dirs: List[Path]) -> Set[Dep]: |
| """Run mvn dependency:list on all shipped modules; return merged dep set.""" |
| artifact_ids = [] |
| for mod_dir in module_dirs: |
| aid = get_maven_artifact_id(mod_dir) |
| if aid: |
| artifact_ids.append(aid) |
| else: |
| print(f" WARNING: Could not read artifactId from {mod_dir}/pom.xml", |
| file=sys.stderr) |
| |
| if not artifact_ids: |
| print("ERROR: No module artifact IDs found", file=sys.stderr) |
| sys.exit(1) |
| |
| pl_arg = ",".join(f":{aid}" for aid in artifact_ids) |
| |
| output_file = pinot_root / "target" / "license-check-deps.txt" |
| output_file.parent.mkdir(parents=True, exist_ok=True) |
| if output_file.exists(): |
| output_file.unlink() |
| |
| cmd = [ |
| "mvn", "dependency:list", |
| f"-pl", pl_arg, |
| "-DincludeScope=runtime", |
| "-DexcludeGroupIds=org.apache.pinot", |
| f"-DoutputFile={output_file}", |
| "-DappendOutput=true", |
| "-DoutputAbsoluteArtifactFilename=false", |
| ] |
| |
| print(f"Running mvn dependency:list for {len(artifact_ids)} modules...") |
| print(f" This may take a few minutes...\n") |
| |
| result = subprocess.run( |
| cmd, cwd=pinot_root, capture_output=True, text=True, timeout=600 |
| ) |
| |
| if result.returncode != 0: |
| print("ERROR: Maven command failed.", file=sys.stderr) |
| # Show last part of output for debugging |
| stderr_tail = result.stderr[-3000:] if len(result.stderr) > 3000 else result.stderr |
| stdout_tail = result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout |
| if stderr_tail.strip(): |
| print(f"STDERR:\n{stderr_tail}", file=sys.stderr) |
| if stdout_tail.strip(): |
| print(f"STDOUT:\n{stdout_tail}", file=sys.stderr) |
| sys.exit(1) |
| |
| # Parse the output file |
| deps = set() |
| if not output_file.exists(): |
| print("ERROR: Maven did not produce output file", file=sys.stderr) |
| sys.exit(1) |
| |
| with open(output_file) as f: |
| for line in f: |
| line = line.strip() |
| if not line or line.startswith("#"): |
| continue |
| parts = line.split(":") |
| if len(parts) == 5: |
| group_id, artifact_id, _pkg_type, version, scope = parts |
| elif len(parts) == 6: |
| group_id, artifact_id, _pkg_type, _classifier, version, scope = parts |
| else: |
| continue |
| |
| scope = scope.strip().lower() |
| if scope in ("test", "provided", "system"): |
| continue |
| |
| deps.add(Dep(group_id.strip(), artifact_id.strip(), version.strip())) |
| |
| print(f"Found {len(deps)} unique non-Pinot dependencies from Maven\n") |
| return deps |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 4: Parse current LICENSE-binary |
| # --------------------------------------------------------------------------- |
| |
| # Regex matching a dependency line: group.id:artifact-id:version |
| DEP_LINE_RE = re.compile( |
| r"^[a-zA-Z][a-zA-Z0-9._-]*:[a-zA-Z][a-zA-Z0-9._-]*:[a-zA-Z0-9._+-]+$" |
| ) |
| |
| |
| def parse_license_binary(license_file: Path) -> Tuple[List[str], List[LicenseSection], Dict[str, str]]: |
| """ |
| Parse LICENSE-binary into: |
| - file_lines: all original lines (for reconstruction) |
| - sections: list of LicenseSection objects |
| - dep_to_section: maps "group:artifact:version" -> section name |
| |
| Returns (file_lines, sections, dep_to_section) |
| """ |
| with open(license_file) as f: |
| lines = f.read().split("\n") |
| |
| sections: List[LicenseSection] = [] |
| dep_to_section: Dict[str, str] = {} |
| |
| # --- Identify the Apache 2.0 section --- |
| # Find the end of the Apache License text, then the first separator after it |
| license_text_end = None |
| for i, line in enumerate(lines): |
| if "END OF TERMS AND CONDITIONS" in line: |
| license_text_end = i |
| break |
| |
| if license_text_end is None: |
| print("ERROR: Cannot find 'END OF TERMS AND CONDITIONS' in LICENSE-binary", |
| file=sys.stderr) |
| sys.exit(1) |
| |
| first_sep = None |
| for i in range(license_text_end, len(lines)): |
| if re.match(r"^-{20,}$", lines[i]): |
| first_sep = i |
| break |
| |
| if first_sep is None: |
| print("ERROR: Cannot find first separator in LICENSE-binary", file=sys.stderr) |
| sys.exit(1) |
| |
| # Find the second separator (ends Apache 2.0 section) |
| second_sep = None |
| for i in range(first_sep + 1, len(lines)): |
| if re.match(r"^-{20,}$", lines[i]): |
| second_sep = i |
| break |
| |
| # Apache 2.0 section: between first_sep and second_sep |
| apache_preamble = [] |
| apache_deps = [] |
| for i in range(first_sep + 1, second_sep if second_sep else len(lines)): |
| line = lines[i].strip() |
| if DEP_LINE_RE.match(line): |
| apache_deps.append(line) |
| elif line: |
| apache_preamble.append(lines[i]) |
| |
| sections.append(LicenseSection( |
| name="Apache License Version 2.0", |
| preamble_lines=apache_preamble, |
| deps=sorted(apache_deps), |
| )) |
| for d in apache_deps: |
| dep_to_section[d] = "Apache License Version 2.0" |
| |
| # --- Parse remaining sections --- |
| if second_sep is None: |
| return lines, sections, dep_to_section |
| |
| # Scan from second_sep onward for license section headers |
| # Strategy: a section header is a non-empty, non-dep line followed by a line of dashes |
| # or it's the section intro text. We detect sections by their dash-underlines. |
| i = second_sep + 1 |
| # Skip the intro paragraph |
| current_section_name = None |
| current_preamble: List[str] = [] |
| current_deps: List[str] = [] |
| |
| while i < len(lines): |
| line = lines[i] |
| stripped = line.strip() |
| |
| # Check if this line is a dash-underline (indicating previous line was a header) |
| if re.match(r"^-{3,}$", stripped) and i > 0: |
| # The previous non-blank line was the section header name |
| # Save current section if we have one |
| if current_section_name is not None: |
| sections.append(LicenseSection( |
| name=current_section_name, |
| preamble_lines=current_preamble, |
| deps=sorted(current_deps), |
| )) |
| for d in current_deps: |
| dep_to_section[d] = current_section_name |
| |
| # Find the header: walk backward from dash line to find the header text |
| header_line = "" |
| for j in range(i - 1, second_sep, -1): |
| if lines[j].strip(): |
| header_line = lines[j].strip() |
| break |
| |
| current_section_name = header_line |
| current_preamble = [] |
| current_deps = [] |
| i += 1 |
| continue |
| |
| # If we're in a section, classify the line |
| if current_section_name is not None: |
| if DEP_LINE_RE.match(stripped): |
| current_deps.append(stripped) |
| elif stripped: |
| current_preamble.append(line) |
| i += 1 |
| |
| # Don't forget the last section |
| if current_section_name is not None and (current_deps or current_preamble): |
| sections.append(LicenseSection( |
| name=current_section_name, |
| preamble_lines=current_preamble, |
| deps=sorted(current_deps), |
| )) |
| for d in current_deps: |
| dep_to_section[d] = current_section_name |
| |
| return lines, sections, dep_to_section |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 5: Compute diff |
| # --------------------------------------------------------------------------- |
| |
| def compute_diff( |
| old_sections: List[LicenseSection], |
| new_deps: Set[Dep], |
| ) -> Tuple[List[Dep], List[str], List[Tuple[str, str, str]]]: |
| """ |
| Compare old LICENSE-binary deps with new Maven deps. |
| |
| Returns: |
| - added: list of Dep objects not in old |
| - removed: list of "group:artifact:version" strings not in new |
| - changed: list of (group:artifact, old_version, new_version) tuples |
| """ |
| # Build maps from old sections |
| old_by_key: Dict[Tuple[str, str], str] = {} # (group, artifact) -> version |
| old_dep_strings: Set[str] = set() |
| for section in old_sections: |
| for dep_str in section.deps: |
| parts = dep_str.split(":") |
| if len(parts) == 3: |
| g, a, v = parts |
| old_by_key[(g, a)] = v |
| old_dep_strings.add(dep_str) |
| |
| # Build map from new deps |
| new_by_key: Dict[Tuple[str, str], str] = {} |
| for dep in new_deps: |
| key = dep.key() |
| # If multiple versions exist (shouldn't happen but just in case), pick the latest |
| if key not in new_by_key or _version_key(dep.version) > _version_key(new_by_key[key]): |
| new_by_key[key] = dep.version |
| |
| old_keys = set(old_by_key.keys()) |
| new_keys = set(new_by_key.keys()) |
| |
| # Added: in new but not in old |
| added = [] |
| for key in sorted(new_keys - old_keys): |
| added.append(Dep(key[0], key[1], new_by_key[key])) |
| |
| # Removed: in old but not in new |
| removed = [] |
| for key in sorted(old_keys - new_keys): |
| removed.append(f"{key[0]}:{key[1]}:{old_by_key[key]}") |
| |
| # Version changed: in both but different version |
| changed = [] |
| for key in sorted(old_keys & new_keys): |
| old_ver = old_by_key[key] |
| new_ver = new_by_key[key] |
| if old_ver != new_ver: |
| changed.append((f"{key[0]}:{key[1]}", old_ver, new_ver)) |
| |
| return added, removed, changed |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 6: Detect licenses for deps (new and version-bumped) |
| # --------------------------------------------------------------------------- |
| |
| def detect_license_from_pom(dep: Dep, m2_repo: Path) -> Optional[str]: |
| """Look up the license from the dependency's POM in the local Maven repo.""" |
| group_path = dep.group_id.replace(".", "/") |
| pom_path = m2_repo / group_path / dep.artifact_id / dep.version / f"{dep.artifact_id}-{dep.version}.pom" |
| |
| if not pom_path.exists(): |
| return None |
| |
| try: |
| tree = ET.parse(pom_path) |
| root = tree.getroot() |
| ns = {"m": "http://maven.apache.org/POM/4.0.0"} |
| |
| licenses_elem = root.find("m:licenses", ns) |
| if licenses_elem is None: |
| licenses_elem = root.find("licenses") |
| if licenses_elem is None: |
| return None |
| |
| # Collect all license names and URLs |
| names = [] |
| urls = [] |
| for lic in licenses_elem: |
| tag_name = lic.tag.split("}")[-1] if "}" in lic.tag else lic.tag |
| if tag_name == "license": |
| name_elem = lic.find("{http://maven.apache.org/POM/4.0.0}name") |
| if name_elem is None: |
| name_elem = lic.find("name") |
| url_elem = lic.find("{http://maven.apache.org/POM/4.0.0}url") |
| if url_elem is None: |
| url_elem = lic.find("url") |
| |
| if name_elem is not None and name_elem.text: |
| names.append(name_elem.text.strip()) |
| if url_elem is not None and url_elem.text: |
| urls.append(url_elem.text.strip()) |
| |
| # Try to classify using each name/url pair |
| for name in names: |
| result = classify_license(name, " ".join(urls)) |
| if result: |
| return result |
| |
| for url in urls: |
| result = classify_license("", url) |
| if result: |
| return result |
| |
| # Return raw name if we couldn't classify |
| if names: |
| return f"UNKNOWN ({names[0]})" |
| |
| except ET.ParseError: |
| pass |
| |
| return None |
| |
| |
| def check_version_bump_license_changes( |
| changed: List[Tuple[str, str, str]], |
| dep_to_section: Dict[str, str], |
| m2_repo: Path, |
| ) -> Tuple[List[Tuple[str, str, str]], List[Tuple[str, str, str, str, str]]]: |
| """ |
| For version-bumped deps, re-check the POM license of the NEW version |
| and compare to the section the OLD version is currently in. |
| |
| Returns: |
| - safe_changed: version bumps where the license stayed the same |
| - license_changed: tuples of (group:artifact, old_ver, new_ver, old_section, new_license) |
| """ |
| safe_changed = [] |
| license_changed = [] |
| |
| for ga, old_ver, new_ver in changed: |
| parts = ga.split(":") |
| if len(parts) != 2: |
| safe_changed.append((ga, old_ver, new_ver)) |
| continue |
| |
| g, a = parts |
| old_dep_str = f"{ga}:{old_ver}" |
| old_section = dep_to_section.get(old_dep_str) |
| |
| new_dep = Dep(g, a, new_ver) |
| new_license = detect_license_from_pom(new_dep, m2_repo) |
| |
| if (new_license and old_section |
| and not new_license.startswith("UNKNOWN") |
| and new_license != old_section): |
| license_changed.append((ga, old_ver, new_ver, old_section, new_license)) |
| else: |
| safe_changed.append((ga, old_ver, new_ver)) |
| |
| return safe_changed, license_changed |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 6b: Detect orphaned license files in licenses-binary/ |
| # --------------------------------------------------------------------------- |
| |
| def find_orphaned_license_files( |
| pinot_root: Path, |
| sections: List[LicenseSection], |
| removed: List[str], |
| added: List[Dep], |
| changed: List[Tuple[str, str, str]], |
| license_changed: List[Tuple[str, str, str, str, str]], |
| license_map: Dict[str, str], |
| ) -> Tuple[List[str], List[str]]: |
| """ |
| After applying all changes, check which license sections will be empty |
| and which license files in licenses-binary/ are no longer referenced. |
| |
| Returns: |
| - empty_sections: section names that will have 0 deps after updates |
| - orphaned_files: filenames in licenses-binary/ not referenced by LICENSE-binary |
| """ |
| licenses_dir = pinot_root / "licenses-binary" |
| |
| # Compute the final dep count per section after applying all changes |
| removed_set = set(removed) |
| # Also remove deps whose license changed (they'll move to a different section) |
| for ga, old_ver, _new_ver, _old_section, _new_license in license_changed: |
| removed_set.add(f"{ga}:{old_ver}") |
| |
| section_dep_counts: Dict[str, int] = {} |
| for section in sections: |
| remaining = [d for d in section.deps if d not in removed_set] |
| section_dep_counts[section.name] = len(remaining) |
| |
| # Account for new deps being added to sections |
| for dep in added: |
| lic = license_map.get(str(dep)) |
| if lic and not lic.startswith("UNKNOWN"): |
| section_dep_counts[lic] = section_dep_counts.get(lic, 0) + 1 |
| |
| # Account for license-changed deps moving to new sections |
| for ga, _old_ver, new_ver, _old_section, new_license in license_changed: |
| section_dep_counts[new_license] = section_dep_counts.get(new_license, 0) + 1 |
| |
| # Sections that have preamble content (e.g., "(see licenses/...)") are kept even |
| # with zero deps — they serve as license-text placeholders for their dep category. |
| sections_with_preamble = { |
| s.name for s in sections if s.preamble_lines |
| } |
| empty_sections = [ |
| name for name, count in section_dep_counts.items() |
| if count == 0 |
| and name != "Apache License Version 2.0" # never remove the main license |
| and name not in sections_with_preamble # keep sections that reference license files |
| ] |
| |
| # Find license files referenced in LICENSE-binary via "(see licenses/...)" lines |
| # and by implicit association with non-empty sections |
| referenced_files: Set[str] = set() |
| license_file_path = pinot_root / "LICENSE-binary" |
| if license_file_path.exists(): |
| with open(license_file_path) as f: |
| content = f.read() |
| # Find explicit references like (see licenses/LICENSE-xxx.txt) |
| for match in re.finditer(r"licenses/([A-Za-z0-9._-]+)", content): |
| referenced_files.add(match.group(1)) |
| |
| # Check which files in licenses-binary/ are not referenced |
| orphaned_files = [] |
| if licenses_dir.exists(): |
| for f in sorted(licenses_dir.iterdir()): |
| if f.is_file() and f.name not in referenced_files: |
| orphaned_files.append(f.name) |
| |
| return empty_sections, orphaned_files |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 7: Generate report |
| # --------------------------------------------------------------------------- |
| |
| def print_report( |
| added: List[Dep], |
| removed: List[str], |
| changed: List[Tuple[str, str, str]], |
| license_changed: List[Tuple[str, str, str, str, str]], |
| license_map: Dict[str, str], |
| empty_sections: List[str], |
| orphaned_files: List[str], |
| ): |
| """Print a human-readable diff report.""" |
| print("=" * 78) |
| print("LICENSE-BINARY UPDATE REPORT") |
| print("=" * 78) |
| |
| if (not added and not removed and not changed and not license_changed |
| and not empty_sections and not orphaned_files): |
| print("\nNo changes detected. LICENSE-binary is up to date.") |
| return |
| |
| # Summary |
| print(f"\n Added: {len(added)}") |
| print(f" Removed: {len(removed)}") |
| print(f" Version changed: {len(changed)}") |
| print(f" License changed: {len(license_changed)}") |
| print(f" Empty sections: {len(empty_sections)}") |
| print(f" Orphaned files: {len(orphaned_files)}") |
| print() |
| |
| # Removed |
| if removed: |
| print("-" * 78) |
| print("REMOVED (delete these lines from LICENSE-binary):") |
| print("-" * 78) |
| for dep_str in removed: |
| print(f" - {dep_str}") |
| print() |
| |
| # Version changes (license unchanged — safe to update in place) |
| if changed: |
| print("-" * 78) |
| print("VERSION CHANGED (update version numbers — license unchanged):") |
| print("-" * 78) |
| for ga, old_v, new_v in changed: |
| print(f" {ga}") |
| print(f" {old_v} --> {new_v}") |
| print() |
| |
| # License changed on version bump — needs manual attention |
| if license_changed: |
| print("-" * 78) |
| print("LICENSE CHANGED ON VERSION BUMP (must move to different section):") |
| print("-" * 78) |
| for ga, old_v, new_v, old_section, new_license in license_changed: |
| print(f" {ga}") |
| print(f" {old_v} --> {new_v}") |
| print(f" MOVE: [{old_section}] --> [{new_license}]") |
| print() |
| |
| # Added, grouped by detected license |
| if added: |
| print("-" * 78) |
| print("ADDED (new deps to add to LICENSE-binary):") |
| print("-" * 78) |
| |
| by_license: Dict[str, List[Dep]] = OrderedDict() |
| unknown = [] |
| for dep in added: |
| dep_str = str(dep) |
| lic = license_map.get(dep_str) |
| if lic and not lic.startswith("UNKNOWN"): |
| by_license.setdefault(lic, []).append(dep) |
| else: |
| unknown.append((dep, lic)) |
| |
| # Flag which sections are NEW (don't exist in LICENSE-binary yet) |
| existing_sections = {s for s in license_map.values() |
| if s and not s.startswith("UNKNOWN")} |
| |
| for lic_name, deps in sorted(by_license.items()): |
| new_marker = " ** NEW SECTION NEEDED **" if lic_name not in existing_sections else "" |
| print(f"\n [{lic_name}]{new_marker}") |
| for dep in sorted(deps, key=str): |
| print(f" + {dep}") |
| |
| if unknown: |
| print(f"\n [REQUIRES MANUAL LICENSE LOOKUP]") |
| print(f" (Visit the project's website to determine the license,") |
| print(f" then add to the appropriate section. If it's a new license") |
| print(f" type, create a new file in licenses-binary/ and a new section.)") |
| for dep, raw_lic in sorted(unknown, key=lambda x: str(x[0])): |
| hint = f" (POM says: {raw_lic})" if raw_lic else "" |
| print(f" ? {dep}{hint}") |
| print() |
| |
| # Empty sections — may want to remove from LICENSE-binary |
| if empty_sections: |
| print("-" * 78) |
| print("EMPTY SECTIONS (consider removing from LICENSE-binary):") |
| print("-" * 78) |
| for section_name in sorted(empty_sections): |
| print(f" - {section_name}") |
| print() |
| |
| # Orphaned license files |
| if orphaned_files: |
| print("-" * 78) |
| print("ORPHANED LICENSE FILES (not referenced in LICENSE-binary — consider deleting):") |
| print("-" * 78) |
| for filename in orphaned_files: |
| print(f" - licenses-binary/{filename}") |
| print() |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 8: Generate updated LICENSE-binary |
| # --------------------------------------------------------------------------- |
| |
| def generate_updated_license_binary( |
| license_file: Path, |
| sections: List[LicenseSection], |
| added: List[Dep], |
| removed: List[str], |
| changed: List[Tuple[str, str, str]], |
| license_changed: List[Tuple[str, str, str, str, str]], |
| license_map: Dict[str, str], |
| output_file: Path, |
| ): |
| """Generate an updated LICENSE-binary file.""" |
| # Read original file |
| with open(license_file) as f: |
| original = f.read() |
| |
| # Build removed set and change map for quick lookup |
| removed_set = set(removed) |
| change_map = {} # "group:artifact" -> new_version |
| for ga, _old_v, new_v in changed: |
| change_map[ga] = new_v |
| |
| # License-changed deps: remove old version from old section, add new version to new section |
| for ga, old_ver, new_ver, _old_section, new_license in license_changed: |
| removed_set.add(f"{ga}:{old_ver}") |
| |
| # Build a map from section name -> deps to add |
| added_by_section: Dict[str, List[str]] = {} |
| unclassified: List[Dep] = [] |
| for dep in added: |
| dep_str = str(dep) |
| lic = license_map.get(dep_str) |
| if lic and not lic.startswith("UNKNOWN"): |
| added_by_section.setdefault(lic, []).append(dep_str) |
| else: |
| unclassified.append(dep) |
| |
| # Also add license-changed deps to their new sections |
| for ga, _old_ver, new_ver, _old_section, new_license in license_changed: |
| added_by_section.setdefault(new_license, []).append(f"{ga}:{new_ver}") |
| |
| # Process the file line by line |
| lines = original.split("\n") |
| output_lines = [] |
| current_section_name = None |
| |
| # Track section boundaries so we can insert new deps at end of section |
| # We'll buffer dep lines per section and flush them sorted |
| in_dep_block = False |
| dep_buffer: List[str] = [] |
| section_for_buffer: Optional[str] = None |
| |
| def flush_dep_buffer(): |
| """Sort and write buffered deps, including any additions for this section.""" |
| nonlocal dep_buffer, section_for_buffer |
| if section_for_buffer and section_for_buffer in added_by_section: |
| dep_buffer.extend(added_by_section.pop(section_for_buffer)) |
| dep_buffer.sort() |
| for d in dep_buffer: |
| output_lines.append(d) |
| dep_buffer = [] |
| section_for_buffer = None |
| |
| i = 0 |
| # We need to track which section we're in based on the parsed sections |
| section_names = {s.name for s in sections} |
| |
| while i < len(lines): |
| line = lines[i] |
| stripped = line.strip() |
| |
| # Detect section header (line of dashes following a non-empty line) |
| if re.match(r"^-{3,}$", stripped) and i > 0: |
| # Check if previous non-blank line is a section name |
| prev_text = "" |
| for j in range(i - 1, max(i - 5, -1), -1): |
| if lines[j].strip(): |
| prev_text = lines[j].strip() |
| break |
| |
| if prev_text in section_names: |
| # We're entering a new section; flush previous section's deps |
| if in_dep_block: |
| flush_dep_buffer() |
| in_dep_block = False |
| |
| current_section_name = prev_text |
| output_lines.append(line) |
| i += 1 |
| in_dep_block = True |
| section_for_buffer = current_section_name |
| continue |
| |
| # Special case: Apache 2.0 section starts after first separator |
| # Detect by "This project bundles" text |
| if "This project bundles some components" in stripped: |
| if in_dep_block: |
| flush_dep_buffer() |
| current_section_name = "Apache License Version 2.0" |
| in_dep_block = True |
| section_for_buffer = current_section_name |
| output_lines.append(line) |
| i += 1 |
| continue |
| |
| # If we hit a separator line while in a dep block, flush and exit block |
| if re.match(r"^-{20,}$", stripped) and in_dep_block: |
| flush_dep_buffer() |
| in_dep_block = False |
| current_section_name = None |
| output_lines.append(line) |
| i += 1 |
| continue |
| |
| # Process dep lines |
| if in_dep_block and DEP_LINE_RE.match(stripped): |
| # Check if removed |
| if stripped in removed_set: |
| i += 1 |
| continue # skip this line |
| |
| # Check if version changed |
| parts = stripped.split(":") |
| if len(parts) == 3: |
| ga = f"{parts[0]}:{parts[1]}" |
| if ga in change_map: |
| new_dep_str = f"{ga}:{change_map[ga]}" |
| dep_buffer.append(new_dep_str) |
| i += 1 |
| continue |
| |
| dep_buffer.append(stripped) |
| i += 1 |
| continue |
| |
| # If we were in a dep block and hit a non-dep, non-blank line |
| # that's not a section header, this might be a note line (e.g., "(see licenses/...)") |
| # or we've exited the dep area |
| if in_dep_block and stripped and not DEP_LINE_RE.match(stripped): |
| # Check if this could be a section header (next line might be dashes) |
| if i + 1 < len(lines) and re.match(r"^-{3,}$", lines[i + 1].strip()): |
| # It's a new section header; flush current deps |
| flush_dep_buffer() |
| in_dep_block = False |
| current_section_name = None |
| output_lines.append(line) |
| i += 1 |
| continue |
| else: |
| # It's a note/preamble line within the section; flush deps first |
| # then add this line |
| if dep_buffer: |
| flush_dep_buffer() |
| in_dep_block = False |
| output_lines.append(line) |
| i += 1 |
| continue |
| |
| # Skip blank lines that trail deps in a section — the post-processor |
| # re-inserts proper spacing between sections. Without this, blank |
| # lines from the original file (between the last dep and the next |
| # section header) end up before the flushed deps, creating |
| # double-spacing. We only skip when the buffer already has deps; |
| # blank lines before any deps (e.g., after a preamble like |
| # "(see licenses/...)") are preserved. |
| if in_dep_block and not stripped and dep_buffer: |
| i += 1 |
| continue |
| |
| # Default: pass through |
| output_lines.append(line) |
| i += 1 |
| |
| # Final flush |
| if in_dep_block: |
| flush_dep_buffer() |
| |
| # Handle unclassified deps: add a TODO section at the end |
| if unclassified or added_by_section: |
| output_lines.append("") |
| if added_by_section: |
| # Sections that weren't matched to existing sections in the file |
| for section_name, deps in sorted(added_by_section.items()): |
| output_lines.append("") |
| output_lines.append(f"TODO: Add to [{section_name}] section:") |
| for d in sorted(deps): |
| output_lines.append(d) |
| |
| if unclassified: |
| output_lines.append("") |
| output_lines.append("TODO: The following new deps need manual license lookup:") |
| for dep in sorted(unclassified, key=str): |
| output_lines.append(f" {dep}") |
| |
| # Post-process: ensure blank line separators between sections. |
| # A section boundary is a non-blank, non-dep line followed by a line of dashes. |
| # We need at least 2 blank lines before each section header. |
| final_lines: List[str] = [] |
| for idx, line in enumerate(output_lines): |
| is_section_header = ( |
| line.strip() |
| and not DEP_LINE_RE.match(line.strip()) |
| and idx + 1 < len(output_lines) |
| and re.match(r"^-{3,}$", output_lines[idx + 1].strip()) |
| ) |
| if is_section_header and final_lines: |
| # Count existing trailing blank lines |
| trailing_blanks = 0 |
| for prev in reversed(final_lines): |
| if prev.strip() == "": |
| trailing_blanks += 1 |
| else: |
| break |
| # Ensure at least 2 blank lines before the section header |
| while trailing_blanks < 2: |
| final_lines.append("") |
| trailing_blanks += 1 |
| final_lines.append(line) |
| output_lines = final_lines |
| |
| content = "\n".join(output_lines) |
| # Clean up excessive blank lines (more than 2 consecutive) |
| content = re.sub(r"\n{4,}", "\n\n\n", content) |
| # Ensure trailing newline |
| if not content.endswith("\n"): |
| content += "\n" |
| |
| with open(output_file, "w") as f: |
| f.write(content) |
| |
| print(f"Updated LICENSE-binary written to: {output_file}") |
| |
| |
| # --------------------------------------------------------------------------- |
| # Step 9: Generate NOTICE-binary |
| # --------------------------------------------------------------------------- |
| |
| _DEFAULT_NOTICE_HEADER = """\ |
| This product includes software developed at |
| The Apache Software Foundation (http://www.apache.org/). |
| |
| // ------------------------------------------------------------------ |
| // NOTICE file corresponding to the section 4d of The Apache License, |
| // Version 2.0, in this case for |
| // ------------------------------------------------------------------ |
| """ |
| |
| |
| def _read_notice_header(notice_file: Path, num_lines: int = 7) -> str: |
| """Read the header from an existing NOTICE-binary file. |
| |
| Falls back to a built-in default if the file doesn't exist. |
| """ |
| if not notice_file.exists(): |
| return _DEFAULT_NOTICE_HEADER |
| with open(notice_file) as f: |
| lines = [f.readline() for _ in range(num_lines)] |
| return "".join(lines) |
| |
| |
| def extract_notices_from_jars(deps: Set[Dep], m2_repo: Path) -> List[Tuple[str, str]]: |
| """ |
| For each dependency, open its JAR in ~/.m2/repository and extract |
| META-INF/NOTICE (or META-INF/NOTICE.txt) if present. |
| |
| Returns list of (dep_str, notice_text) tuples. |
| """ |
| notices = [] |
| checked = 0 |
| found = 0 |
| |
| for dep in sorted(deps, key=str): |
| group_path = dep.group_id.replace(".", "/") |
| jar_path = ( |
| m2_repo / group_path / dep.artifact_id / dep.version |
| / f"{dep.artifact_id}-{dep.version}.jar" |
| ) |
| |
| if not jar_path.exists(): |
| continue |
| |
| checked += 1 |
| try: |
| with zipfile.ZipFile(jar_path, "r") as zf: |
| notice_text = None |
| # Look for NOTICE files (case-insensitive, with or without .txt) |
| for name in zf.namelist(): |
| basename = name.lower() |
| if basename in ( |
| "meta-inf/notice", |
| "meta-inf/notice.txt", |
| "meta-inf/notice.md", |
| ): |
| notice_text = zf.read(name).decode("utf-8", errors="replace") |
| break |
| |
| if notice_text and notice_text.strip(): |
| # Skip notices that are just the generic ASF boilerplate |
| # (many Apache projects have identical trivial notices) |
| notices.append((str(dep), notice_text.strip())) |
| found += 1 |
| except (zipfile.BadZipFile, KeyError, OSError): |
| pass |
| |
| print(f" Scanned {checked} JARs, found {found} with NOTICE files") |
| return notices |
| |
| |
| def strip_asf_boilerplate(text: str) -> str: |
| """ |
| Strip the common Apache Software Foundation boilerplate that appears in |
| most Apache project NOTICE files. The ApacheNoticeResourceTransformer |
| puts this once at the top; we do the same. |
| |
| Strips lines matching patterns like: |
| - "This product includes software developed at" |
| - "This product includes software developed by" |
| - "The Apache Software Foundation (http://www.apache.org/)." |
| """ |
| lines = text.split("\n") |
| filtered = [] |
| skip_next_blank = False |
| i = 0 |
| while i < len(lines): |
| line = lines[i].strip() |
| |
| # Check for the 2-line ASF boilerplate pattern |
| if re.match(r"^This product includes software developed (at|by)$", line): |
| # Check if next line is the ASF URL line |
| if (i + 1 < len(lines) |
| and re.match(r"^The Apache Software Foundation", lines[i + 1].strip())): |
| i += 2 |
| # Skip trailing blank lines after the boilerplate |
| while i < len(lines) and not lines[i].strip(): |
| i += 1 |
| continue |
| |
| # Check for the single-line variant |
| if re.match( |
| r"^This product includes software developed (at|by)\s+" |
| r"The Apache Software Foundation", |
| line, |
| ): |
| i += 1 |
| while i < len(lines) and not lines[i].strip(): |
| i += 1 |
| continue |
| |
| filtered.append(lines[i]) |
| i += 1 |
| |
| # Strip leading/trailing blank lines |
| result = "\n".join(filtered).strip() |
| return result |
| |
| |
| def deduplicate_notices(notices: List[Tuple[str, str]]) -> str: |
| """ |
| Deduplicate and concatenate NOTICE texts. |
| |
| Uses two strategies: |
| 1. Exact dedup after whitespace normalization (catches identical copies) |
| 2. Title-based dedup: if two notices share the same "title" (first |
| non-blank line, e.g. "# Jackson JSON processor"), keep only the |
| longest version (catches Jackson-style near-duplicates where each |
| module ships a slightly different variant) |
| """ |
| seen_normalized: Set[str] = set() |
| by_title: Dict[str, Tuple[str, str]] = OrderedDict() # title -> (dep, longest_text) |
| |
| for dep_str, raw_text in notices: |
| text = strip_asf_boilerplate(raw_text) |
| if not text: |
| continue |
| |
| # Exact dedup |
| normalized = re.sub(r"\s+", " ", text) |
| if normalized in seen_normalized: |
| continue |
| seen_normalized.add(normalized) |
| |
| # Title-based dedup: extract first non-blank line as "title" |
| title = "" |
| for line in text.split("\n"): |
| stripped = line.strip().lstrip("#").strip() |
| if stripped: |
| title = stripped |
| break |
| |
| if title and title in by_title: |
| # Keep the longer version (more complete notice) |
| existing_text = by_title[title][1] |
| if len(text) > len(existing_text): |
| by_title[title] = (dep_str, text) |
| else: |
| key = title if title else dep_str |
| by_title[key] = (dep_str, text) |
| |
| return "\n\n".join(text for _, text in by_title.values()) |
| |
| |
| def generate_notice_binary( |
| deps: Set[Dep], |
| m2_repo: Path, |
| output_file: Path, |
| existing_notice_file: Optional[Path] = None, |
| ): |
| """Generate NOTICE-binary by extracting and merging NOTICE files from dependency JARs.""" |
| print("\nGenerating NOTICE-binary...") |
| notices = extract_notices_from_jars(deps, m2_repo) |
| |
| if not notices: |
| print(" WARNING: No NOTICE files found in any dependency JARs.") |
| print(" Make sure the project has been built (mvn install) first.") |
| return |
| |
| header = _read_notice_header(existing_notice_file) if existing_notice_file else _DEFAULT_NOTICE_HEADER |
| merged = deduplicate_notices(notices) |
| content = header + "\n" + merged + "\n" |
| |
| with open(output_file, "w") as f: |
| f.write(content) |
| |
| print(f" NOTICE-binary written to: {output_file}") |
| print(f" Contains notices from {len(notices)} dependencies " |
| f"({len(set(t for _, t in notices))} unique)") |
| |
| |
| # --------------------------------------------------------------------------- |
| # Main |
| # --------------------------------------------------------------------------- |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Update LICENSE-binary and NOTICE-binary for Apache Pinot releases" |
| ) |
| parser.add_argument( |
| "--pinot-root", |
| type=Path, |
| default=None, |
| help="Path to pinot repository root (auto-detected if not set)", |
| ) |
| parser.add_argument( |
| "--report-only", |
| action="store_true", |
| help="Only print the diff report; don't generate updated files", |
| ) |
| |
| scope_group = parser.add_mutually_exclusive_group() |
| scope_group.add_argument( |
| "--license-only", |
| action="store_true", |
| help="Only update LICENSE-binary (skip NOTICE-binary)", |
| ) |
| scope_group.add_argument( |
| "--notice-only", |
| action="store_true", |
| help="Only update NOTICE-binary (skip LICENSE-binary)", |
| ) |
| |
| parser.add_argument( |
| "--output", |
| type=Path, |
| default=None, |
| help="Output path for updated LICENSE-binary (default: LICENSE-binary.updated)", |
| ) |
| parser.add_argument( |
| "--notice-output", |
| type=Path, |
| default=None, |
| help="Output path for updated NOTICE-binary (default: NOTICE-binary.updated)", |
| ) |
| parser.add_argument( |
| "--m2-repo", |
| type=Path, |
| default=Path.home() / ".m2" / "repository", |
| help="Path to local Maven repository (default: ~/.m2/repository)", |
| ) |
| parser.add_argument( |
| "--skip-maven", |
| type=Path, |
| default=None, |
| help="Skip Maven invocation; read deps from this file instead " |
| "(format: one group:artifact:type:version:scope per line)", |
| ) |
| args = parser.parse_args() |
| |
| update_license = not args.notice_only |
| update_notice = not args.license_only |
| |
| # Determine pinot root |
| if args.pinot_root: |
| pinot_root = args.pinot_root.resolve() |
| else: |
| # Try to detect from script location or cwd |
| script_dir = Path(__file__).resolve().parent |
| candidate = script_dir.parent |
| if (candidate / "LICENSE-binary").exists(): |
| pinot_root = candidate |
| elif (Path.cwd() / "LICENSE-binary").exists(): |
| pinot_root = Path.cwd() |
| else: |
| print("ERROR: Cannot detect pinot root. Use --pinot-root.", file=sys.stderr) |
| sys.exit(1) |
| |
| license_file = pinot_root / "LICENSE-binary" |
| if not license_file.exists(): |
| print(f"ERROR: {license_file} not found", file=sys.stderr) |
| sys.exit(1) |
| |
| print(f"Pinot root: {pinot_root}") |
| print(f"Updating: ", end="") |
| if update_license and update_notice: |
| print("LICENSE-binary + NOTICE-binary") |
| elif update_license: |
| print("LICENSE-binary only") |
| else: |
| print("NOTICE-binary only") |
| print() |
| |
| # Step 1: Parse assembly to find shipped modules |
| modules = parse_assembly_modules(pinot_root) |
| |
| # Step 2-3: Get new deps from Maven |
| if args.skip_maven: |
| print(f"Reading deps from {args.skip_maven}...") |
| new_deps = set() |
| with open(args.skip_maven) as f: |
| for line in f: |
| line = line.strip() |
| if not line or line.startswith("#"): |
| continue |
| parts = line.split(":") |
| if len(parts) == 5: |
| # group:artifact:type:version:scope |
| g, a, v = parts[0], parts[1], parts[3] |
| new_deps.add(Dep(g.strip(), a.strip(), v.strip())) |
| elif len(parts) == 6: |
| # group:artifact:type:classifier:version:scope |
| g, a, v = parts[0], parts[1], parts[4] |
| new_deps.add(Dep(g.strip(), a.strip(), v.strip())) |
| print(f"Read {len(new_deps)} deps from file\n") |
| else: |
| new_deps = run_dependency_list(pinot_root, modules) |
| |
| # LICENSE-binary update |
| if update_license: |
| # Step 4: Parse current LICENSE-binary |
| print("Parsing current LICENSE-binary...") |
| file_lines, sections, dep_to_section = parse_license_binary(license_file) |
| total_old = sum(len(s.deps) for s in sections) |
| print(f" Found {len(sections)} license sections with {total_old} total deps\n") |
| for s in sections: |
| print(f" {s.name}: {len(s.deps)} deps") |
| print() |
| |
| # Step 5: Compute diff |
| added, removed, changed = compute_diff(sections, new_deps) |
| |
| # Step 6: Detect licenses for new deps |
| print("Detecting licenses for new dependencies...") |
| license_map: Dict[str, str] = {} |
| detected = 0 |
| for dep in added: |
| lic = detect_license_from_pom(dep, args.m2_repo) |
| if lic: |
| license_map[str(dep)] = lic |
| detected += 1 |
| print(f" Auto-detected licenses for {detected}/{len(added)} new deps") |
| |
| # Step 6a: Check if version-bumped deps changed their license |
| print("Checking version-bumped deps for license changes...") |
| changed, license_changed = check_version_bump_license_changes( |
| changed, dep_to_section, args.m2_repo |
| ) |
| if license_changed: |
| print(f" WARNING: {len(license_changed)} deps changed license on version bump!") |
| else: |
| print(f" All {len(changed)} version bumps have unchanged licenses") |
| |
| # Step 6b: Detect orphaned license files |
| print("Checking for orphaned license files...") |
| empty_sections, orphaned_files = find_orphaned_license_files( |
| pinot_root, sections, removed, added, changed, license_changed, license_map, |
| ) |
| if empty_sections: |
| print(f" {len(empty_sections)} sections will be empty after update") |
| if orphaned_files: |
| print(f" {len(orphaned_files)} license files not referenced in LICENSE-binary") |
| print() |
| |
| # Step 7: Print report |
| print_report(added, removed, changed, license_changed, license_map, |
| empty_sections, orphaned_files) |
| |
| # Step 8: Generate updated file |
| if not args.report_only: |
| output_path = args.output or (pinot_root / "LICENSE-binary.updated") |
| generate_updated_license_binary( |
| license_file, sections, added, removed, changed, |
| license_changed, license_map, output_path, |
| ) |
| print(f"\nReview the updated file, then:") |
| print(f" diff {license_file} {output_path}") |
| print(f" cp {output_path} {license_file}") |
| else: |
| print("(--report-only mode: no LICENSE-binary file written)") |
| |
| # NOTICE-binary update |
| if update_notice: |
| if args.report_only: |
| print("(--report-only mode: no NOTICE-binary file written)") |
| else: |
| notice_output = args.notice_output or (pinot_root / "NOTICE-binary.updated") |
| generate_notice_binary( |
| new_deps, args.m2_repo, notice_output, |
| existing_notice_file=pinot_root / "NOTICE-binary", |
| ) |
| print(f"\n Review the updated NOTICE file, then:") |
| print(f" diff {pinot_root / 'NOTICE-binary'} {notice_output}") |
| print(f" cp {notice_output} {pinot_root / 'NOTICE-binary'}") |
| |
| |
| if __name__ == "__main__": |
| main() |