blob: 72fe6fb6f937b2d8ea141cee7951bbf50a0b57f5 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#### Functions #### check end of file for main logic
env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
# Log messages in the style of the configure script
lg <- function(..., .indent = "***") {
cat(.indent, " ", sprintf(...), "\n", sep = "")
}
cleanup <- function(path) {
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), path))
}
# Exit the script after logging with .status=1 instead of throwing an error
exit <- function(..., .status = 1) {
lg(...)
q(save = "no", status = .status)
}
# checks the nightly repo for the latest nightly version X.Y.Z.100<dev>
find_latest_nightly <- function(description_version,
list_uri = "https://nightlies.apache.org/arrow/r/src/contrib/PACKAGES",
hush = quietly) {
if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) {
lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo)
return(description_version)
}
res <- try(
{
# Binaries are only uploaded if all jobs pass so can just look at the source versions.
urls <- readLines(list_uri)
versions <- grep("Version:\\s*.*?", urls, value = TRUE)
versions <- sort(package_version(sub("Version:\\s*", "\\1", versions)))
major_versions <- versions$major
description_version_major <- as.integer(description_version[1, 1])
matching_major <- major_versions == description_version_major
if (!any(matching_major)) {
lg(
"No nightly binaries were found for version %s: falling back to libarrow build from source",
description_version
)
return(description_version)
}
versions <- versions[matching_major]
max(versions)
},
silent = hush
)
if (inherits(res, "try-error")) {
lg("Failed to find latest nightly for %s", description_version)
latest <- description_version
} else {
lg("Latest available nightly for %s: %s", description_version, res)
latest <- res
}
latest
}
try_download <- function(from_url, to_file, hush = quietly) {
if (!download_ok) {
# Don't even try
return(FALSE)
}
# We download some fairly large files, so ensure the timeout is set appropriately.
# This assumes a static library size of 100 MB (generous) and a download speed
# of .3 MB/s (slow). This is to anticipate slower user connections or load on
# artifactory servers.
opts <- options(timeout = max(300, getOption("timeout")))
on.exit(options(opts))
status <- try(
suppressWarnings(
download.file(from_url, to_file, quiet = hush, mode = "wb")
),
silent = hush
)
# Return whether the download was successful
!inherits(status, "try-error") && status == 0
}
validate_checksum <- function(binary_url, libfile, hush = quietly) {
# Explicitly setting the env var to "false" will skip checksum validation
# e.g. in case the included checksums are stale.
skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums")
# validate binary checksum for CRAN release only
if (!skip_checksum && dir.exists(checksum_path) && is_release ||
enforce_checksum) {
# Munge the path to the correct sha file which we include during the
# release process
checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
checksum_file <- file.path(checksum_path, checksum_file)
# Try `shasum`, and if that doesn't work, fall back to `sha512sum` if not found
# system2 doesn't generate an R error, so we can't use a tryCatch to
# move from shasum to sha512sum.
# The warnings from system2 if it fails pop up later in the log and thus are
# more confusing than they are helpful (so we suppress them)
checksum_ok <- suppressWarnings(system2(
"shasum",
args = c("--status", "-a", "512", "-c", checksum_file),
stdout = ifelse(quietly, FALSE, ""),
stderr = ifelse(quietly, FALSE, "")
)) == 0
if (!checksum_ok) {
checksum_ok <- suppressWarnings(system2(
"sha512sum",
args = c("--status", "-c", checksum_file),
stdout = ifelse(quietly, FALSE, ""),
stderr = ifelse(quietly, FALSE, "")
)) == 0
}
if (checksum_ok) {
lg("Checksum validated successfully for libarrow")
} else {
lg("Checksum validation failed for libarrow")
unlink(libfile)
}
} else {
checksum_ok <- TRUE
}
# Return whether the checksum was successful
checksum_ok
}
download_binary <- function(lib) {
libfile <- paste0("arrow-", VERSION, ".zip")
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile) && validate_checksum(binary_url, libfile)) {
lg("Successfully retrieved libarrow (%s)", lib)
} else {
# If the download or checksum fail, we will set libfile to NULL this will
# normally result in a source build after this.
# TODO: should we condense these together and only call them when verbose?
lg(
"Unable to retrieve libarrow for version %s (%s)",
VERSION, lib
)
if (!quietly) {
lg(
"Attempted to download the libarrow binary from: %s",
binary_url
)
}
libfile <- NULL
}
libfile
}
# Function to figure out which flavor of binary we should download, if at all.
# LIBARROW_BINARY controls the behavior. If unset, it will determine a course
# of action based on the current system. Other values you can set it to:
# * "FALSE" (not case-sensitive), to skip this option altogether
# * "TRUE" (not case-sensitive), to try to discover your current OS, or
# * Some other string: a "linux-openssl-${OPENSSL_VERSION}" that corresponds to
# a binary that is available, to override what this function may discover by
# default.
# Possible values are:
# * "linux-openssl-1.0" (OpenSSL 1.0)
# * "linux-openssl-1.1" (OpenSSL 1.1)
# * "linux-openssl-3.0" (OpenSSL 3.0)
# * "macos-amd64-openssl-1.1" (OpenSSL 1.1)
# * "macos-amd64-openssl-3.0" (OpenSSL 3.0)
# * "macos-arm64-openssl-1.1" (OpenSSL 1.1)
# * "macos-arm64-openssl-3.0" (OpenSSL 3.0)
# These string values, along with `NULL`, are the potential return values of
# this function.
identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) {
if (on_windows) {
return("windows")
}
lib <- tolower(lib)
if (identical(lib, "")) {
# Not specified. Check the allowlist.
lib <- ifelse(check_allowlist(info$id), "true", "false")
}
if (identical(lib, "false")) {
# Do not download a binary
lib <- NULL
} else if (!identical(lib, "true")) {
# Env var provided an os-version to use, to override our logic.
# We don't validate that this exists. If it doesn't, the download will fail
# and the build will fall back to building from source
} else {
# See if we can find a suitable binary
lib <- select_binary()
}
lib
}
check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") {
allowlist <- tryCatch(
# Try a remote allowlist so that we can add/remove without a release
suppressWarnings(readLines(allowed)),
# Fallback to default: allowed only on Ubuntu and CentOS/RHEL
error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
)
# allowlist should contain valid regular expressions (plain strings ok too)
any(grepl(paste(allowlist, collapse = "|"), os))
}
select_binary <- function(os = tolower(Sys.info()[["sysname"]]),
arch = tolower(Sys.info()[["machine"]]),
test_program = test_for_curl_and_openssl) {
if (identical(os, "darwin") || (identical(os, "linux") && identical(arch, "x86_64"))) {
# We only host x86 linux binaries and x86 & arm64 macos today
binary <- tryCatch(
# Somehow the test program system2 call errors on the sanitizer builds
# so globally handle the possibility that this could fail
{
errs <- compile_test_program(test_program)
openssl_version <- determine_binary_from_stderr(errs)
arch <- ifelse(identical(os, "darwin"), paste0("-", arch, "-"), "-")
if (is.null(openssl_version)) {
NULL
} else {
paste0(os, arch, openssl_version)
}
},
error = function(e) {
lg("Unable to find libcurl and openssl")
NULL
}
)
} else {
# No binary available for arch
lg("Building on %s %s", os, arch)
binary <- NULL
}
binary
}
# This tests that curl and OpenSSL are present (bc we can include their headers)
# and it checks for other versions/features and raises errors that we grep for
test_for_curl_and_openssl <- "
#ifndef __APPLE__
#include <ciso646>
#ifdef _LIBCPP_VERSION
#error Using libc++
#endif
#endif
#include <curl/curl.h>
#include <openssl/opensslv.h>
#if OPENSSL_VERSION_NUMBER < 0x10002000L
#error OpenSSL version too old
#endif
#if OPENSSL_VERSION_NUMBER < 0x10100000L
#error Using OpenSSL version 1.0
#endif
#if OPENSSL_VERSION_NUMBER >= 0x30000000L
#error Using OpenSSL version 3
#endif
"
compile_test_program <- function(code) {
openssl_dir <- ""
if (on_macos) {
openssl_root_dir <- get_macos_openssl_dir()
openssl_dir <- paste0("-I", openssl_root_dir, "/include")
}
runner <- paste(
R_CMD_config("CXX17"),
openssl_dir,
R_CMD_config("CPPFLAGS"),
R_CMD_config("CXX17FLAGS"),
R_CMD_config("CXX17STD"),
"-E",
"-xc++"
)
suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner), stdout = FALSE, stderr = TRUE))
}
get_macos_openssl_dir <- function() {
openssl_root_dir <- Sys.getenv("OPENSSL_ROOT_DIR", NA)
header <- "openssl/opensslv.h"
if (is.na(openssl_root_dir) || !file.exists(file.path(openssl_root_dir, "include", header))) {
# try to guess default openssl include dir based on CRAN's build script
# https://github.com/R-macos/recipes/blob/master/build.sh#L35
if (identical(Sys.info()["machine"], "arm64") && file.exists(file.path("/opt/R/arm64/include", header))) {
openssl_root_dir <- "/opt/R/arm64"
} else if (identical(Sys.info()["machine"], "x86_64") && file.exists(file.path("/opt/R/x86_64/include", header))) {
openssl_root_dir <- "/opt/R/x86_64"
} else {
openssl_root_dir <- "/usr/local"
}
}
openssl_root_dir
}
# (built with newer devtoolset but older glibc (2.17) for broader compatibility,# like manylinux2014)
determine_binary_from_stderr <- function(errs) {
if (is.null(attr(errs, "status"))) {
# There was no error in compiling: so we found libcurl and OpenSSL >= 1.1,
# openssl is < 3.0
lg("Found libcurl and OpenSSL >= 1.1")
return("openssl-1.1")
# Else, check for dealbreakers:
} else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) {
# Our linux binaries are all built with GNU stdlib so they fail with libc++
lg("Linux binaries incompatible with libc++")
return(NULL)
} else if (header_not_found("curl/curl", errs)) {
lg("libcurl not found")
return(NULL)
} else if (header_not_found("openssl/opensslv", errs)) {
lg("OpenSSL not found")
return(NULL)
} else if (any(grepl("OpenSSL version too old", errs))) {
lg("OpenSSL found but version >= 1.0.2 is required for some features")
return(NULL)
# Else, determine which other binary will work
} else if (any(grepl("Using OpenSSL version 1.0", errs))) {
if (on_macos) {
lg("OpenSSL 1.0 is not supported on macOS")
return(NULL)
}
lg("Found libcurl and OpenSSL < 1.1")
return("openssl-1.0")
} else if (any(grepl("Using OpenSSL version 3", errs))) {
lg("Found libcurl and OpenSSL >= 3.0.0")
return("openssl-3.0")
}
NULL
}
header_not_found <- function(header, errs) {
regex <- sprintf("[Ee]rror.*%s\\.h", header)
any(grepl(regex, errs))
}
#### start distro ####
distro <- function() {
# This is not part of distro but needed to enable prebuilt binaries on macos
if (on_macos) {
return(list(id = "darwin", arch = tolower(Sys.info()[["machine"]])))
}
# The code in this script is a (potentially stale) copy of the distro package
if (requireNamespace("distro", quietly = TRUE)) {
# Use the version from the package, which may be updated from this
return(distro::distro())
}
out <- lsb_release()
if (is.null(out)) {
out <- os_release()
if (is.null(out)) {
out <- system_release()
}
}
if (is.null(out)) {
return(NULL)
}
out$id <- tolower(out$id)
# debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name
if (is.null(out$version) || out$version %in% c("testing", "unstable")) {
if (grepl("bullseye", out$codename)) {
out$short_version <- "11"
} else if (grepl("bookworm", out$codename)) {
out$short_version <- "12"
}
} else if (out$id == "ubuntu") {
# Keep major.minor version
out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
} else {
# Only major version number
out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
}
out
}
lsb_release <- function() {
if (have_lsb_release()) {
list(
id = call_lsb("-is"),
version = call_lsb("-rs"),
codename = call_lsb("-cs")
)
} else {
NULL
}
}
have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
os_release <- function() {
rel_data <- read_os_release()
if (!is.null(rel_data)) {
vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
out <- list(
id = vals[["ID"]],
version = vals[["VERSION_ID"]]
)
if ("VERSION_CODENAME" %in% names(vals)) {
out$codename <- vals[["VERSION_CODENAME"]]
} else {
# This probably isn't right, maybe could extract codename from pretty name?
out$codename <- vals[["PRETTY_NAME"]]
}
out
} else {
NULL
}
}
read_os_release <- function() {
if (file.exists("/etc/os-release")) {
readLines("/etc/os-release")
}
}
system_release <- function() {
rel_data <- read_system_release()
if (!is.null(rel_data)) {
# Something like "CentOS Linux release 7.7.1908 (Core)"
list(
id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
codename = NA
)
} else {
NULL
}
}
read_system_release <- function() {
if (file.exists("/etc/system-release")) {
readLines("/etc/system-release")[1]
}
}
#### end distro ####
find_local_source <- function() {
# We'll take the first of these that exists
# The first case probably occurs if we're in the arrow git repo
# The second probably occurs if we're installing the arrow R package
cpp_dir_options <- c(
file.path(Sys.getenv("ARROW_SOURCE_HOME", ".."), "cpp"),
"tools/cpp"
)
for (cpp_dir in cpp_dir_options) {
if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) {
lg("Found local C++ source: '%s'", cpp_dir)
return(cpp_dir)
}
}
NULL
}
env_vars_as_string <- function(env_var_list) {
# Do some basic checks on env_var_list:
# Check that env_var_list has names, that those names are valid POSIX
# environment variables, and that none of the values contain `'`.
stopifnot(
length(env_var_list) == length(names(env_var_list)),
all(grepl("^[^0-9]", names(env_var_list))),
all(grepl("^[a-zA-Z0-9_]+$", names(env_var_list))),
!any(grepl("'", env_var_list, fixed = TRUE))
)
env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ")
if (nchar(env_var_string) > 30000) {
# This could happen if the full paths in *_SOURCE_URL were *very* long.
# A more formal check would look at getconf ARG_MAX, but this shouldn't matter
lg("Warning: Environment variables are very long. This could cause issues on some shells.")
}
env_var_string
}
R_CMD_config <- function(var) {
tools::Rcmd(paste("config", var), stdout = TRUE)
}
build_libarrow <- function(src_dir, dst_dir) {
# We'll need to compile R bindings with these libs, so delete any .o files
system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE)
# Set up make for parallel building
# CRAN policy says not to use more than 2 cores during checks
# If you have more and want to use more, set MAKEFLAGS or NOT_CRAN
ncores <- parallel::detectCores()
if (!not_cran) {
ncores <- min(ncores, 2)
}
makeflags <- Sys.getenv("MAKEFLAGS")
if (makeflags == "") {
makeflags <- sprintf("-j%s", ncores)
Sys.setenv(MAKEFLAGS = makeflags)
}
if (!quietly) {
lg("Building with MAKEFLAGS=%s", makeflags)
}
# Check for libarrow build dependencies:
# * cmake
cmake <- ensure_cmake()
# Optionally build somewhere not in tmp so we can dissect the build if it fails
debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR")
if (nzchar(debug_dir)) {
build_dir <- debug_dir
} else {
# But normally we'll just build in a tmp dir
build_dir <- tempfile()
}
cleanup(build_dir)
env_var_list <- c(
SOURCE_DIR = src_dir,
BUILD_DIR = build_dir,
DEST_DIR = dst_dir,
CMAKE = cmake,
# EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it defined
EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"),
# Make sure we build with the same compiler settings that R is using
# Exception: if you've added ccache to CC and CXX following
# http://dirk.eddelbuettel.com/blog/2017/11/27/, some libarrow
# third party dependencies will error on compilation. But don't
# worry, `ARROW_USE_CCACHE=ON` by default, so if ccache
# is found, it will be used by the libarrow build, and this does
# not affect how R compiles the arrow bindings.
CC = sub("^.*ccache", "", R_CMD_config("CC")),
CXX = paste(sub("^.*ccache", "", R_CMD_config("CXX17")), R_CMD_config("CXX17STD")),
# CXXFLAGS = R_CMD_config("CXX17FLAGS"), # We don't want the same debug symbols
LDFLAGS = R_CMD_config("LDFLAGS"),
N_JOBS = ncores
)
dep_source <- Sys.getenv("ARROW_DEPENDENCY_SOURCE")
if (dep_source %in% c("", "AUTO") && !nzchar(Sys.which("pkg-config"))) {
lg("pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED", .indent = "****")
env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED")
}
# On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled
# Necessary due to #39590 for CRAN
if (on_macos) {
# Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already.
deps_to_bundle <- c("Boost", "lz4")
for (dep_to_bundle in deps_to_bundle) {
env_var <- paste0(dep_to_bundle, "_SOURCE")
if (Sys.getenv(env_var) == "") {
env_var_list <- c(env_var_list, setNames("BUNDLED", env_var))
}
}
}
env_var_list <- with_cloud_support(env_var_list)
# turn_off_all_optional_features() needs to happen after
# with_cloud_support(), since it might turn features ON.
thirdparty_deps_unavailable <- !download_ok &&
!dir.exists(thirdparty_dependency_dir) &&
!env_is("ARROW_DEPENDENCY_SOURCE", "system")
do_minimal_build <- env_is("LIBARROW_MINIMAL", "true")
if (do_minimal_build) {
env_var_list <- turn_off_all_optional_features(env_var_list)
} else if (thirdparty_deps_unavailable) {
cat(paste0(
"*** Building C++ library from source, but downloading thirdparty dependencies\n",
" is not possible, so this build will turn off all thirdparty features.\n",
" See installation guide for details:\n",
" https://arrow.apache.org/docs/r/articles/install.html\n"
))
env_var_list <- turn_off_all_optional_features(env_var_list)
} else if (dir.exists(thirdparty_dependency_dir)) {
# Add the *_SOURCE_URL env vars
env_var_list <- set_thirdparty_urls(env_var_list)
}
env_vars <- env_vars_as_string(env_var_list)
lg("arrow %s", ifelse(quietly, "", paste("with", env_vars)), .indent = "****")
build_log_path <- tempfile(fileext = ".log")
status <- suppressWarnings(system2(
"bash",
"inst/build_arrow_static.sh",
env = env_vars,
stdout = ifelse(quietly, build_log_path, ""),
stderr = ifelse(quietly, build_log_path, "")
))
if (status != 0) {
# It failed :(
lg("Error building Arrow C++.", .indent = "****")
if (quietly) {
cat(
"**** Printing contents of build log because the build failed",
"while ARROW_R_DEV was set to FALSE\n"
)
cat(readLines(build_log_path), sep = "\n")
cat("**** Complete build log may still be present at", build_log_path, "\n")
}
}
invisible(status)
}
ensure_cmake <- function(cmake_minimum_required = "3.16") {
cmake <- find_cmake(version_required = cmake_minimum_required)
if (is.null(cmake)) {
# If not found, download it
CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4")
if (on_macos) {
postfix <- "-macos-universal.tar.gz"
} else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) {
postfix <- "-linux-aarch64.tar.gz"
} else if (tolower(Sys.info()[["machine"]]) == "x86_64") {
postfix <- "-linux-x86_64.tar.gz"
} else {
exit(paste0(
"*** cmake was not found locally.\n",
" Please make sure cmake >= ", cmake_minimum_required,
" is installed and available on your PATH."
))
}
cmake_binary_url <- paste0(
"https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
"/cmake-", CMAKE_VERSION, postfix
)
cmake_tar <- tempfile()
cmake_dir <- tempfile()
download_successful <- try_download(cmake_binary_url, cmake_tar)
if (!download_successful) {
exit(paste0(
"*** cmake was not found locally and download failed.\n",
" Make sure cmake >= ", cmake_minimum_required,
" is installed and available on your PATH,\n",
" or download ", cmake_binary_url, "\n",
" and define the CMAKE environment variable.\n"
))
}
untar(cmake_tar, exdir = cmake_dir)
unlink(cmake_tar)
cleanup(cmake_dir)
# the bin dir is slightly different on macos
if (on_macos) {
bin_dir <- "CMake.app/Contents/bin"
} else {
bin_dir <- "bin"
}
cmake <- paste0(
cmake_dir,
"/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE),
"/",
bin_dir,
"/cmake"
)
lg("cmake %s", CMAKE_VERSION, .indent = "****")
}
cmake
}
find_cmake <- function(paths = c(
Sys.getenv("CMAKE"),
Sys.which("cmake"),
# CRAN has it here, not on PATH
if (on_macos) "/Applications/CMake.app/Contents/bin/cmake",
Sys.which("cmake3")
),
version_required = "3.16") {
# Given a list of possible cmake paths, return the first one that exists and is new enough
# version_required should be a string or packageVersion; numeric version
# can be misleading (e.g. 3.10 is actually 3.1)
for (path in paths) {
if (nzchar(path) && file.exists(path)) {
# Sys.which() returns a named vector, but that plays badly with c() later
names(path) <- NULL
found_version <- cmake_version(path)
if (found_version >= version_required) {
# Show which one we found
lg("cmake %s: %s", found_version, path, .indent = "****")
# Stop searching here
return(path)
} else {
# Keep trying
lg("Not using cmake found at %s", path, .indent = "****")
if (found_version > 0) {
lg("Version >= %s required; found %s", version_required, found_version, .indent = "*****")
} else {
# If cmake_version() couldn't determine version, it returns 0
lg("Could not determine version; >= %s required", version_required, .indent = "*****")
}
}
}
}
# If none found, return NULL
NULL
}
cmake_version <- function(cmd = "cmake") {
tryCatch(
{
raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE)
pat <- ".* ([0-9\\.]+).*?"
which_line <- grep(pat, raw_version)
package_version(sub(pat, "\\1", raw_version[which_line]))
},
error = function(e) {
return(0)
}
)
}
turn_off_all_optional_features <- function(env_var_list) {
# Because these are done as environment variables (as opposed to build flags),
# setting these to "OFF" overrides any previous setting. We don't need to
# check the existing value.
# Some features turn on other features (e.g. substrait -> protobuf),
# So the list of things to turn off is long. See:
# https://github.com/apache/arrow/blob/main/cpp/cmake_modules/ThirdpartyToolchain.cmake#L275
turn_off <- c(
"ARROW_MIMALLOC" = "OFF",
"ARROW_JEMALLOC" = "OFF",
"ARROW_JSON" = "OFF",
"ARROW_PARQUET" = "OFF", # depends on thrift
"ARROW_DATASET" = "OFF", # depends on parquet
"ARROW_S3" = "OFF",
"ARROW_GCS" = "OFF",
"ARROW_WITH_GOOGLE_CLOUD_CPP" = "OFF",
"ARROW_WITH_NLOHMANN_JSON" = "OFF",
"ARROW_SUBSTRAIT" = "OFF",
"ARROW_WITH_PROTOBUF" = "OFF",
"ARROW_WITH_BROTLI" = "OFF",
"ARROW_WITH_BZ2" = "OFF",
"ARROW_WITH_LZ4" = "OFF",
"ARROW_WITH_SNAPPY" = "OFF",
"ARROW_WITH_ZLIB" = "OFF",
"ARROW_WITH_ZSTD" = "OFF",
"ARROW_WITH_RE2" = "OFF",
"ARROW_WITH_UTF8PROC" = "OFF",
# The syntax to turn off XSIMD is different.
# Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
"EXTRA_CMAKE_FLAGS" = paste(
env_var_list[["EXTRA_CMAKE_FLAGS"]],
"-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
)
)
# Create a new env_var_list, with the values of turn_off set.
# replace() also adds new values if they didn't exist before
replace(env_var_list, names(turn_off), turn_off)
}
get_component_names <- function() {
if (!isTRUE(Sys.which("bash") != "")) {
stop("nixlibs.R requires bash to be installed and available in your PATH")
}
deps_bash <- "tools/download_dependencies_R.sh"
csv_tempfile <- tempfile(fileext = ".csv")
deps_bash_success <- system2("bash", deps_bash, stdout = csv_tempfile) == 0
if (!deps_bash_success) {
stop("Failed to run download_dependencies_R.sh")
}
deps_df <- read.csv(csv_tempfile,
stringsAsFactors = FALSE, row.names = NULL, quote = "'"
)
stopifnot(
names(deps_df) == c("env_varname", "filename"),
nrow(deps_df) > 0
)
deps_df
}
set_thirdparty_urls <- function(env_var_list) {
# This function does *not* check if existing *_SOURCE_URL variables are set.
# The directory tools/thirdparty_dependencies is created by
# create_package_with_all_dependencies() and saved in the tar file.
deps_df <- get_component_names()
dep_dir <- normalizePath(thirdparty_dependency_dir, mustWork = TRUE)
deps_df$full_filenames <- file.path(dep_dir, deps_df$filename)
files_exist <- file.exists(deps_df$full_filenames)
if (!any(files_exist)) {
stop("Dependency tar files did not exist in '", dep_dir, "'")
}
# Only set env var for files that are in thirdparty_dependency_dir
# (allows for a user to download a limited set of tar files, if they wanted)
deps_df <- deps_df[files_exist, ]
env_var_list <- replace(env_var_list, deps_df$env_varname, deps_df$full_filenames)
if (!quietly) {
env_var_list <- replace(env_var_list, "ARROW_VERBOSE_THIRDPARTY_BUILD", "ON")
}
env_var_list
}
is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) {
env_value <- tolower(Sys.getenv(env_varname))
if (identical(env_value, "off")) {
# If e.g. ARROW_MIMALLOC=OFF explicitly, override default
requested <- FALSE
} else if (identical(env_value, "on")) {
requested <- TRUE
} else {
requested <- default
}
requested
}
with_cloud_support <- function(env_var_list) {
arrow_s3 <- is_feature_requested("ARROW_S3")
arrow_gcs <- is_feature_requested("ARROW_GCS")
if (arrow_s3 || arrow_gcs) {
# User wants S3 or GCS support.
# Make sure that we have curl and openssl system libs
feats <- c(
if (arrow_s3) "S3",
if (arrow_gcs) "GCS"
)
start_msg <- paste(feats, collapse = "/")
off_flags <- paste("ARROW_", feats, "=OFF", sep = "", collapse = " and ")
print_warning <- function(msg) {
# Utility to assemble warning message in the console
cat("**** ", start_msg, " support ", msg, "; building with ", off_flags, "\n")
}
if (!cmake_find_package("CURL", NULL, env_var_list)) {
# curl on macos should be installed, so no need to alter this for macos
# TODO: check for apt/yum/etc. and message the right thing?
print_warning("requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb)")
arrow_s3 <- FALSE
arrow_gcs <- FALSE
} else if (!cmake_find_package("OpenSSL", "1.0.2", env_var_list)) {
print_warning("requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew)")
arrow_s3 <- FALSE
arrow_gcs <- FALSE
}
}
# Update the build flags
env_var_list <- replace(env_var_list, "ARROW_S3", ifelse(arrow_s3, "ON", "OFF"))
replace(env_var_list, "ARROW_GCS", ifelse(arrow_gcs, "ON", "OFF"))
}
cmake_find_package <- function(pkg, version = NULL, env_var_list) {
td <- tempfile()
dir.create(td)
cleanup(td)
find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)")
writeLines(find_package, file.path(td, "CMakeLists.txt"))
env_vars <- env_vars_as_string(env_var_list)
cmake_cmd <- paste0(
"export ", env_vars,
" && cd ", td,
" && $CMAKE ",
" -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON",
" -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON",
" ."
)
system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0
}
############### Main logic #############
args <- commandArgs(TRUE)
VERSION <- args[1]
# TESTING is set in test-nixlibs.R; it won't be set when called from configure
test_mode <- exists("TESTING")
# Prevent error with binary selection during testing.
if (test_mode && is.na(VERSION)) {
VERSION <- "8.0.0.9000"
}
VERSION <- package_version(VERSION)
dev_version <- VERSION[1, 4]
# Small dev versions are added for R-only changes during CRAN submission
is_release <- is.na(dev_version) || dev_version < "100"
on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin"
on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
quietly <- !env_is("ARROW_R_DEV", "true")
not_cran <- env_is("NOT_CRAN", "true")
if (is_release) {
VERSION <- VERSION[1, 1:3]
arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/")
} else {
not_cran <- TRUE
arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/")
}
if (!is_release && !test_mode) {
VERSION <- find_latest_nightly(VERSION)
}
# To collect dirs to rm on exit, use cleanup() to add dirs
# we reset it to avoid errors on reruns in the same session.
options(.arrow.cleanup = character())
on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE)
# enable full featured builds for macOS in case of CRAN source builds.
if (not_cran || on_macos) {
# Set more eager defaults
if (env_is("LIBARROW_BINARY", "")) {
Sys.setenv(LIBARROW_BINARY = "true")
}
if (env_is("LIBARROW_MINIMAL", "")) {
Sys.setenv(LIBARROW_MINIMAL = "false")
}
}
# The default will build from source as a fallback if a binary is not found or shouldn't be used
# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow
# and don't fall back to a full source build
build_ok <- !env_is("LIBARROW_BUILD", "false")
# Check if we're authorized to download
download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true")
download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false")
# This "tools/thirdparty_dependencies" path, within the tar file, might exist if
# create_package_with_all_dependencies() was run, or if someone has created it
# manually before running make build.
# If you change this path, you also need to edit
# `create_package_with_all_dependencies()` in install-arrow.R
thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies")
arrow_versioned <- paste0("arrow-", VERSION)
# configure.win uses a different libarrow dir and the zip is already nested
if (on_windows) {
lib_dir <- "windows"
dst_dir <- lib_dir
} else {
lib_dir <- "libarrow"
dst_dir <- file.path(lib_dir, arrow_versioned)
}
api_h <- file.path(lib_dir, arrow_versioned, "include/arrow/api.h")
if (!test_mode && !file.exists(api_h)) {
# If we're working in a local checkout and have already built the libs, we
# don't need to do anything. Otherwise,
# (1) Look for a prebuilt binary for this version
bin_file <- src_dir <- NULL
# Keep backwards compatibility with winlibs.R
bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES", Sys.getenv("RWINLIB_LOCAL", NA))
if (!is.na(bin_zip)) {
lg("Using pre-downloaded zip for libarrow binaries: %s", bin_zip)
if (file.exists(bin_zip)) {
bin_file <- tempfile()
file.copy(bin_zip, bin_file)
} else {
lg("File not found: %s ($ARROW_DOWNLOADED_BINARIES)", bin_zip)
bin_file <- NULL
}
} else if (download_libarrow_ok) {
binary_flavor <- identify_binary()
if (!is.null(binary_flavor)) {
# The env vars say we can, and we've determined a lib that should work
bin_file <- download_binary(binary_flavor)
}
}
if (!is.null(bin_file)) {
# Extract them
dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
unzip(bin_file, exdir = dst_dir)
unlink(bin_file)
} else if (build_ok && !on_windows) {
# (2) Find source and build it
src_dir <- find_local_source()
if (!is.null(src_dir)) {
cat(paste0(
"*** Building libarrow from source\n",
" For build options and troubleshooting, see the install guide:\n",
" https://arrow.apache.org/docs/r/articles/install.html\n"
))
build_libarrow(src_dir, dst_dir)
} else {
exit("Proceeding without libarrow (no local source)")
}
} else {
exit("Proceeding without libarrow (build not authorized)")
}
}