blob: 40b5ac4cfe35845df5a4cec3fc9ea578d86f3a93 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import subprocess
import platform
import urllib.request as ulib
import os
import logging
import importlib
# Constants
PYARROW_VERSION = "15.0.0"
PROJECT_ROOT_DIR = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")
)
# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
def get_bazel_version():
"""Get the bazel version from the .bazelversion file."""
with open(os.path.join(PROJECT_ROOT_DIR, ".bazelversion")) as f:
return f.read().strip()
def exec_cmd(cmd: str):
"""Execute a shell command and return its output."""
logging.info(f"running command: {cmd}")
try:
result = subprocess.check_output(cmd, shell=True, universal_newlines=True)
except subprocess.CalledProcessError as error:
logging.error(error.stdout)
raise
logging.info(f"command result: {result}")
return result
def get_os_name_lower():
"""Get the lowercase name of the operating system."""
return platform.system().lower()
def is_windows():
"""Check if the operating system is Windows."""
return get_os_name_lower() == "windows"
def get_os_machine():
"""Get the normalized machine architecture."""
machine = platform.machine().lower()
# Normalize architecture names
if machine in ["x86_64", "amd64"]:
return "x86_64"
elif machine in ["aarch64", "arm64"]:
return "arm64"
return machine
def get_bazel_download_url():
"""Construct the URL to download bazel."""
bazel_version = get_bazel_version()
download_url_base = (
f"https://github.com/bazelbuild/bazel/releases/download/{bazel_version}"
)
# For Windows, use the .exe installer
if is_windows():
return f"{download_url_base}/bazel-{bazel_version}-windows-x86_64.exe"
# For Unix-like systems, use the binary directly (not the installer)
return f"{download_url_base}/bazel-{bazel_version}-{get_os_name_lower()}-{get_os_machine()}"
def urlretrieve_with_retries(url, filename, max_attempts=5, initial_delay=2):
"""Download a URL to filename with retries and exponential backoff.
Raises the last exception if all attempts fail.
"""
attempt = 1
delay = initial_delay
last_exc = None
while attempt <= max_attempts:
try:
logging.info(f"Downloading (attempt {attempt}) {url} -> {filename}")
ulib.urlretrieve(url, filename)
return
except Exception as e:
logging.error(f"Download attempt {attempt} failed: {e}")
last_exc = e
if attempt == max_attempts:
break
logging.info(f"Retrying in {delay} seconds...")
import time
time.sleep(delay)
delay *= 2
attempt += 1
logging.error(f"All {max_attempts} download attempts failed for URL: {url}")
raise last_exc
def cd_project_subdir(subdir):
"""Change to a subdirectory of the project."""
os.chdir(os.path.join(PROJECT_ROOT_DIR, subdir))
def bazel(cmd: str):
"""Execute a bazel command from the project root directory."""
# Ensure we're in the project root directory where MODULE.bazel is located
original_dir = os.getcwd()
os.chdir(PROJECT_ROOT_DIR)
try:
bazel_cmd = "bazel" if is_windows() else "~/bin/bazel"
return exec_cmd(f"{bazel_cmd} {cmd}")
finally:
os.chdir(original_dir)
def update_shell_profile():
"""Update shell profile to include bazel in PATH."""
home = os.path.expanduser("~")
profiles = [".bashrc", ".bash_profile", ".zshrc"]
path_export = 'export PATH="$PATH:$HOME/bin" # Add Bazel to PATH\n'
for profile in profiles:
profile_path = os.path.join(home, profile)
if os.path.exists(profile_path):
with open(profile_path, "a") as f:
f.write(path_export)
logging.info(f"Updated {profile} to include Bazel PATH.")
break
else:
logging.info("No shell profile found. Please add Bazel to PATH manually.")
def install_bazel():
"""Download and install bazel."""
required_version = get_bazel_version()
# Check if bazel is already cached (from GitHub Actions cache)
if not is_windows():
home_bin = os.path.expanduser("~/bin")
bazel_path = os.path.join(home_bin, "bazel")
# Also check ~/.local/bin for some systems
alt_bin = os.path.expanduser("~/.local/bin")
alt_bazel_path = os.path.join(alt_bin, "bazel")
for path in [bazel_path, alt_bazel_path]:
if os.path.exists(path) and os.access(path, os.X_OK):
logging.info(f"Bazel already exists at {path}, verifying...")
try:
# Verify it works and has the correct version
result = exec_cmd(f"{path} --version")
installed_version = result.strip().replace("bazel ", "")
if installed_version == required_version:
logging.info(f"Cached Bazel binary is valid: {result.strip()}")
logging.info("Skipping Bazel download, using cached binary")
return
else:
logging.warning(
f"Cached Bazel version {installed_version} does not match "
f"required version {required_version}"
)
logging.info("Re-downloading Bazel with correct version...")
try:
os.remove(path)
except Exception:
pass
except Exception as e:
logging.warning(f"Cached Bazel binary at {path} is invalid: {e}")
logging.info("Re-downloading Bazel...")
try:
os.remove(path)
except Exception:
pass
bazel_download_url = get_bazel_download_url()
logging.info(f"Downloading bazel from: {bazel_download_url}")
if is_windows():
# For Windows, download the installer and add it to PATH
local_name = "bazel.exe"
try:
urlretrieve_with_retries(bazel_download_url, local_name)
except Exception as e:
logging.error(f"Failed to download bazel: {e}")
logging.error(f"URL: {bazel_download_url}")
logging.error(
f"OS: {get_os_name_lower()}, Machine: {get_os_machine()}, Original Machine: {platform.machine().lower()}"
)
raise
os.chmod(local_name, 0o777)
bazel_path = os.path.join(os.getcwd(), local_name)
exec_cmd(f'setx path "%PATH%;{bazel_path}"')
else:
# For Unix-like systems, download the binary directly to ~/bin/bazel
home_bin = os.path.expanduser("~/bin")
os.makedirs(home_bin, exist_ok=True)
bazel_path = os.path.join(home_bin, "bazel")
try:
urlretrieve_with_retries(bazel_download_url, bazel_path)
except Exception as e:
logging.error(f"Failed to download bazel: {e}")
logging.error(f"URL: {bazel_download_url}")
logging.error(
f"OS: {get_os_name_lower()}, Machine: {get_os_machine()}, Original Machine: {platform.machine().lower()}"
)
raise
os.chmod(bazel_path, 0o755)
update_shell_profile()
# bazel install status check
bazel("--version")
# default is byte
psutil = importlib.import_module("psutil")
total_mem = psutil.virtual_memory().total
limit_jobs = int(total_mem / 1024 / 1024 / 1024 / 3)
bazelrc_path = os.path.join(PROJECT_ROOT_DIR, ".bazelrc")
with open(bazelrc_path, "a") as file:
file.write(f"\nbuild --jobs={limit_jobs}")
def install_cpp_deps():
"""Install dependencies for C++ development."""
# Check the Python version and install the appropriate pyarrow version
python_version = platform.python_version()
if python_version.startswith("3.13"):
exec_cmd("pip install pyarrow==18.0.0")
exec_cmd("pip install numpy")
else:
exec_cmd(f"pip install pyarrow=={PYARROW_VERSION}")
# Automatically install numpy
exec_cmd("pip install psutil")
install_bazel()