| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import argparse |
| import json |
| import os |
| import re |
| import subprocess |
| from typing import List |
| from urllib.request import Request, urlopen |
| |
| # Define patterns for each group of files you're interested in |
| PATTERNS = { |
| "python": [ |
| r"^\.github/workflows/.*python", |
| r"^tests/", |
| r"^superset/", |
| r"^scripts/", |
| r"^setup\.py", |
| r"^requirements/.+\.txt", |
| r"^.pylintrc", |
| ], |
| "frontend": [ |
| r"^\.github/workflows/.*(bashlib|frontend|e2e)", |
| r"^superset-frontend/", |
| ], |
| "docker": [ |
| r"^Dockerfile$", |
| r"^docker.*", |
| ], |
| "docs": [ |
| r"^docs/", |
| ], |
| "superset-extensions-cli": [ |
| r"^\.github/workflows/superset-extensions-cli\.yml", |
| r"^superset-extensions-cli/", |
| r"^superset-core/", |
| ], |
| } |
| GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") |
| |
| |
| def fetch_files_github_api(url: str): # type: ignore |
| """Fetches data using GitHub API.""" |
| req = Request(url) # noqa: S310 |
| req.add_header("Authorization", f"Bearer {GITHUB_TOKEN}") |
| req.add_header("Accept", "application/vnd.github.v3+json") |
| |
| print(f"Fetching from {url}") |
| with urlopen(req) as response: # noqa: S310 |
| body = response.read() |
| return json.loads(body) |
| |
| |
| def fetch_changed_files_pr(repo: str, pr_number: str) -> List[str]: |
| """Fetches files changed in a PR using the GitHub API.""" |
| |
| # NOTE: limited to 100 files ideally should page-through but instead resorting |
| # to assuming we should trigger when 100 files have been touched |
| url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files?per_page=100" |
| files = fetch_files_github_api(url) |
| return [file_info["filename"] for file_info in files] |
| |
| |
| def fetch_changed_files_push(repo: str, sha: str) -> List[str]: |
| """Fetches files changed in the last commit for push events using GitHub API.""" |
| # Fetch commit details to get the parent SHA |
| commit_url = f"https://api.github.com/repos/{repo}/commits/{sha}" |
| commit_data = fetch_files_github_api(commit_url) |
| if "parents" not in commit_data or len(commit_data["parents"]) < 1: |
| raise RuntimeError("No parent commit found for comparison.") |
| parent_sha = commit_data["parents"][0]["sha"] |
| # Compare the current commit against its parent |
| compare_url = f"https://api.github.com/repos/{repo}/compare/{parent_sha}...{sha}" |
| comparison_data = fetch_files_github_api(compare_url) |
| return [file["filename"] for file in comparison_data["files"]] |
| |
| |
| def detect_changes(files: List[str], check_patterns: List) -> bool: # type: ignore |
| """Detects if any of the specified files match the provided patterns.""" |
| for file in files: |
| for pattern in check_patterns: |
| if re.match(pattern, file): |
| return True |
| return False |
| |
| |
| def print_files(files: List[str]) -> None: |
| print("\n".join([f"- {s}" for s in files])) |
| |
| |
| def is_int(s: str) -> bool: |
| return bool(re.match(r"^-?\d+$", s)) |
| |
| |
| def main(event_type: str, sha: str, repo: str) -> None: |
| """Main function to check for file changes based on event context.""" |
| print("SHA:", sha) |
| print("EVENT_TYPE", event_type) |
| files = [] |
| if event_type == "pull_request": |
| pr_number = os.getenv("GITHUB_REF", "").split("/")[-2] |
| if is_int(pr_number): |
| files = fetch_changed_files_pr(repo, pr_number) |
| print("PR files:") |
| print_files(files) |
| |
| elif event_type == "push": |
| files = fetch_changed_files_push(repo, sha) |
| print("Files touched since previous commit:") |
| print_files(files) |
| |
| elif event_type == "workflow_dispatch": |
| print("Workflow dispatched, assuming all changed") |
| |
| else: |
| raise ValueError("Unsupported event type") |
| |
| changes_detected = {} |
| for group, regex_patterns in PATTERNS.items(): |
| patterns_compiled = [re.compile(p) for p in regex_patterns] |
| changes_detected[group] = files is None or detect_changes( |
| files, patterns_compiled |
| ) |
| |
| # Output results |
| output_path = os.getenv("GITHUB_OUTPUT") or "/tmp/GITHUB_OUTPUT.txt" # noqa: S108 |
| with open(output_path, "a") as f: |
| for check, changed in changes_detected.items(): |
| # NOTE: as noted above, we assume that if 100 files are touched, we should |
| # trigger all checks. This is a workaround for the GitHub API limit of 100 |
| # files. Using >= 99 because off-by-one errors are not uncommon |
| if changed or len(files) >= 99: |
| print(f"{check}=true", file=f) |
| print(f"Triggering group: {check}") |
| |
| |
| def get_git_sha() -> str: |
| return os.getenv("GITHUB_SHA") or subprocess.check_output( # noqa: S603 |
| ["git", "rev-parse", "HEAD"] # noqa: S607 |
| ).strip().decode("utf-8") |
| |
| |
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser( |
| description="Detect file changes based on event context" |
| ) |
| parser.add_argument( |
| "--event-type", |
| default=os.getenv("GITHUB_EVENT_NAME") or "push", |
| help="The type of event that triggered the workflow", |
| ) |
| parser.add_argument( |
| "--sha", |
| default=get_git_sha(), |
| help="The commit SHA for push events or PR head SHA", |
| ) |
| parser.add_argument( |
| "--repo", |
| default=os.getenv("GITHUB_REPOSITORY") or "apache/superset", |
| help="GitHub repository in the format owner/repo", |
| ) |
| args = parser.parse_args() |
| |
| main(args.event_type, args.sha, args.repo) |