| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| from __future__ import print_function |
| |
| import functools |
| import os |
| import pprint |
| import re |
| import sys |
| import subprocess |
| |
| |
| perr = functools.partial(print, file=sys.stderr) |
| |
| |
| def dump_env_vars(prefix, pattern=None): |
| if pattern is not None: |
| match = lambda s: re.search(pattern, s) |
| else: |
| match = lambda s: True |
| for name in sorted(os.environ): |
| if name.startswith(prefix) and match(name): |
| perr("- {0}: {1!r}".format(name, os.environ[name])) |
| |
| |
| def run_cmd(cmdline): |
| proc = subprocess.Popen( |
| cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE |
| ) |
| out, err = proc.communicate() |
| if proc.returncode != 0: |
| raise RuntimeError( |
| "Command {cmdline} failed with code {returncode}, " |
| "stderr was:\n{stderr}\n".format( |
| cmdline=cmdline, |
| returncode=proc.returncode, |
| stderr=err.decode(), |
| ) |
| ) |
| return out |
| |
| |
| def get_commit_description(commit): |
| """ |
| Return the textual description (title + body) of the given git commit. |
| """ |
| out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", commit]) |
| return out.decode("utf-8", "ignore") |
| |
| |
| def list_affected_files(commit_range): |
| """ |
| Return a list of files changed by the given git commit range. |
| """ |
| perr("Getting affected files from", repr(commit_range)) |
| out = run_cmd(["git", "diff", "--name-only", commit_range]) |
| return list(filter(None, (s.strip() for s in out.decode().splitlines()))) |
| |
| |
| def get_travis_head_commit(): |
| return os.environ["TRAVIS_COMMIT"] |
| |
| |
| def get_travis_commit_range(): |
| if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request": |
| # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain |
| # unrelated changes. Instead, use the same strategy as on AppVeyor |
| # below. |
| run_cmd( |
| [ |
| "git", |
| "fetch", |
| "-q", |
| "origin", |
| "+refs/heads/{0}".format(os.environ["TRAVIS_BRANCH"]), |
| ] |
| ) |
| merge_base = ( |
| run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"]) |
| .decode() |
| .strip() |
| ) |
| return "{0}..HEAD".format(merge_base) |
| else: |
| cr = os.environ["TRAVIS_COMMIT_RANGE"] |
| # See |
| # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 |
| return cr.replace("...", "..") |
| |
| |
| def get_travis_commit_description(): |
| # Prefer this to get_commit_description(get_travis_head_commit()), |
| # as rebasing or other repository events may make TRAVIS_COMMIT invalid |
| # at the time we inspect it |
| return os.environ["TRAVIS_COMMIT_MESSAGE"] |
| |
| |
| def list_travis_affected_files(): |
| """ |
| Return a list of files affected in the current Travis build. |
| """ |
| commit_range = get_travis_commit_range() |
| try: |
| return list_affected_files(commit_range) |
| except RuntimeError: |
| # TRAVIS_COMMIT_RANGE can contain invalid revisions when |
| # building a branch (not a PR) after rebasing: |
| # https://github.com/travis-ci/travis-ci/issues/2668 |
| if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request": |
| raise |
| # If it's a rebase, it's probably enough to use the last commit only |
| commit_range = "{0}^..".format(get_travis_head_commit()) |
| return list_affected_files(commit_range) |
| |
| |
| def list_appveyor_affected_files(): |
| """ |
| Return a list of files affected in the current AppVeyor build. |
| This only works for PR builds. |
| """ |
| # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it |
| run_cmd( |
| [ |
| "git", |
| "fetch", |
| "-q", |
| "origin", |
| "+refs/heads/{0}".format(os.environ["APPVEYOR_REPO_BRANCH"]), |
| ] |
| ) |
| # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) |
| merge_base = ( |
| run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"]).decode().strip() |
| ) |
| # Compute changes files between base changeset and HEAD |
| return list_affected_files("{0}..HEAD".format(merge_base)) |
| |
| |
| def list_github_actions_affected_files(): |
| """ |
| Return a list of files affected in the current GitHub Actions build. |
| """ |
| # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points |
| # to the merge commit while `HEAD^` points to the commit before. Hence, |
| # `..HEAD^` points to all commit between master and the PR. |
| return list_affected_files("HEAD^..") |
| |
| |
| LANGUAGE_TOPICS = [ |
| "c_glib", |
| "cpp", |
| "docs", |
| "go", |
| "java", |
| "js", |
| "python", |
| "r", |
| "ruby", |
| "rust", |
| "csharp", |
| ] |
| |
| ALL_TOPICS = LANGUAGE_TOPICS + ["integration", "dev"] |
| |
| |
| AFFECTED_DEPENDENCIES = { |
| "java": ["integration", "python"], |
| "js": ["integration"], |
| "ci": ALL_TOPICS, |
| "cpp": ["python", "c_glib", "r", "ruby", "integration"], |
| "format": LANGUAGE_TOPICS, |
| "go": ["integration"], |
| ".travis.yml": ALL_TOPICS, |
| "appveyor.yml": ALL_TOPICS, |
| # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in |
| # practice it's going to be CI |
| ".github": ALL_TOPICS, |
| "c_glib": ["ruby"], |
| } |
| |
| COMPONENTS = { |
| "cpp", |
| "java", |
| "c_glib", |
| "r", |
| "ruby", |
| "integration", |
| "js", |
| "rust", |
| "csharp", |
| "go", |
| "docs", |
| "python", |
| "dev", |
| } |
| |
| |
| def get_affected_topics(affected_files): |
| """ |
| Return a dict of topics affected by the given files. |
| Each dict value is True if affected, False otherwise. |
| """ |
| affected = dict.fromkeys(ALL_TOPICS, False) |
| |
| for path in affected_files: |
| parts = [] |
| head = path |
| while head: |
| head, tail = os.path.split(head) |
| parts.append(tail) |
| parts.reverse() |
| assert parts |
| p = parts[0] |
| fn = parts[-1] |
| if fn.startswith("README"): |
| continue |
| |
| if p in COMPONENTS: |
| affected[p] = True |
| |
| _path_already_affected = {} |
| |
| def _affect_dependencies(component): |
| if component in _path_already_affected: |
| # For circular dependencies, terminate |
| return |
| for topic in AFFECTED_DEPENDENCIES.get(component, ()): |
| affected[topic] = True |
| _affect_dependencies(topic) |
| _path_already_affected[topic] = True |
| |
| _affect_dependencies(p) |
| |
| return affected |
| |
| |
| def make_env_for_topics(affected): |
| return { |
| "ARROW_CI_{0}_AFFECTED".format(k.upper()): "1" if v else "0" |
| for k, v in affected.items() |
| } |
| |
| |
| def get_unix_shell_eval(env): |
| """ |
| Return a shell-evalable string to setup some environment variables. |
| """ |
| return "; ".join(("export {0}='{1}'".format(k, v) for k, v in env.items())) |
| |
| |
| def get_windows_shell_eval(env): |
| """ |
| Return a shell-evalable string to setup some environment variables. |
| """ |
| return "\n".join(('set "{0}={1}"'.format(k, v) for k, v in env.items())) |
| |
| |
| def run_from_travis(): |
| perr("Environment variables (excerpt):") |
| dump_env_vars("TRAVIS_", "(BRANCH|COMMIT|PULL)") |
| if ( |
| os.environ["TRAVIS_REPO_SLUG"] == "apache/arrow" |
| and os.environ["TRAVIS_BRANCH"] == "master" |
| and os.environ["TRAVIS_EVENT_TYPE"] != "pull_request" |
| ): |
| # Never skip anything on master builds in the official repository |
| affected = dict.fromkeys(ALL_TOPICS, True) |
| else: |
| desc = get_travis_commit_description() |
| if "[skip travis]" in desc: |
| # Skip everything |
| affected = dict.fromkeys(ALL_TOPICS, False) |
| elif "[force ci]" in desc or "[force travis]" in desc: |
| # Test everything |
| affected = dict.fromkeys(ALL_TOPICS, True) |
| else: |
| # Test affected topics |
| affected_files = list_travis_affected_files() |
| perr("Affected files:", affected_files) |
| affected = get_affected_topics(affected_files) |
| assert set(affected) <= set(ALL_TOPICS), affected |
| |
| perr("Affected topics:") |
| perr(pprint.pformat(affected)) |
| return get_unix_shell_eval(make_env_for_topics(affected)) |
| |
| |
| def run_from_appveyor(): |
| perr("Environment variables (excerpt):") |
| dump_env_vars("APPVEYOR_", "(PULL|REPO)") |
| if not os.environ.get("APPVEYOR_PULL_REQUEST_HEAD_COMMIT"): |
| # Not a PR build, test everything |
| affected = dict.fromkeys(ALL_TOPICS, True) |
| else: |
| affected_files = list_appveyor_affected_files() |
| perr("Affected files:", affected_files) |
| affected = get_affected_topics(affected_files) |
| assert set(affected) <= set(ALL_TOPICS), affected |
| |
| perr("Affected topics:") |
| perr(pprint.pformat(affected)) |
| return get_windows_shell_eval(make_env_for_topics(affected)) |
| |
| |
| def run_from_github(): |
| perr("Environment variables (excerpt):") |
| dump_env_vars( |
| "GITHUB_", "(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)" |
| ) |
| if os.environ["GITHUB_EVENT_NAME"] != "pull_request": |
| # Not a PR build, test everything |
| affected = dict.fromkeys(ALL_TOPICS, True) |
| else: |
| affected_files = list_github_actions_affected_files() |
| perr("Affected files:", affected_files) |
| affected = get_affected_topics(affected_files) |
| assert set(affected) <= set(ALL_TOPICS), affected |
| |
| perr("Affected topics:") |
| perr(pprint.pformat(affected)) |
| return get_unix_shell_eval(make_env_for_topics(affected)) |
| |
| |
| def test_get_affected_topics(): |
| affected_topics = get_affected_topics(["cpp/CMakeLists.txt"]) |
| assert affected_topics == { |
| "c_glib": True, |
| "cpp": True, |
| "docs": False, |
| "go": False, |
| "java": False, |
| "js": False, |
| "python": True, |
| "r": True, |
| "ruby": True, |
| "rust": False, |
| "csharp": False, |
| "integration": True, |
| "dev": False, |
| } |
| |
| affected_topics = get_affected_topics(["format/Schema.fbs"]) |
| assert affected_topics == { |
| "c_glib": True, |
| "cpp": True, |
| "docs": True, |
| "go": True, |
| "java": True, |
| "js": True, |
| "python": True, |
| "r": True, |
| "ruby": True, |
| "rust": True, |
| "csharp": True, |
| "integration": True, |
| "dev": False, |
| } |
| |
| affected_topics = get_affected_topics([".github/workflows"]) |
| assert affected_topics == { |
| "c_glib": True, |
| "cpp": True, |
| "docs": True, |
| "go": True, |
| "java": True, |
| "js": True, |
| "python": True, |
| "r": True, |
| "ruby": True, |
| "rust": True, |
| "csharp": True, |
| "integration": True, |
| "dev": True, |
| } |
| |
| |
| if __name__ == "__main__": |
| # This script should have its output evaluated by a shell, |
| # e.g. "eval `python ci/detect-changes.py`" |
| if os.environ.get("TRAVIS"): |
| try: |
| print(run_from_travis()) |
| except Exception: |
| # Make sure the enclosing eval will return an error |
| print("exit 1") |
| raise |
| elif os.environ.get("APPVEYOR"): |
| try: |
| print(run_from_appveyor()) |
| except Exception: |
| print("exit 1") |
| raise |
| elif os.environ.get("GITHUB_WORKFLOW"): |
| try: |
| print(run_from_github()) |
| except Exception: |
| print("exit 1") |
| raise |
| else: |
| sys.exit( |
| "Script must be run under Travis-CI, AppVeyor or GitHub Actions" |
| ) |