blob: 3d9edf03687bcba2d5ce7ae63996e0dc63a98e96 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
args <- commandArgs(TRUE)
VERSION <- args[1]
dst_dir <- paste0("libarrow/arrow-", VERSION)
# TESTING is set in test-nixlibs.R; it won't be set when called from configure
test_mode <- exists("TESTING")
# Prevent error with binary selection during testing.
if (test_mode && is.na(VERSION)) {
VERSION <- "8.0.0.9000"
}
dev_version <- package_version(VERSION)[1, 4]
# Small dev versions are added for R-only changes during CRAN submission.
if (is.na(dev_version) || dev_version < 100) {
VERSION <- package_version(VERSION)[1, 1:3]
arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/")
} else {
arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/")
}
options(.arrow.cleanup = character()) # To collect dirs to rm on exit
on.exit(unlink(getOption(".arrow.cleanup")))
env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
try_download <- function(from_url, to_file, hush = quietly) {
status <- try(
suppressWarnings(
download.file(from_url, to_file, quiet = hush)
),
silent = hush
)
# Return whether the download was successful
!inherits(status, "try-error") && status == 0
}
# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
quietly <- !env_is("ARROW_R_DEV", "true")
# The default will build from source as a fallback if a binary is not found or shouldn't be used
# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow
# and don't fall back to a full source build
build_ok <- !env_is("LIBARROW_BUILD", "false")
# Check if we're authorized to download (not asked an offline build).
# (Note that cmake will still be downloaded if necessary
# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds)
download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true")
# This "tools/thirdparty_dependencies" path, within the tar file, might exist if
# create_package_with_all_dependencies() was run, or if someone has created it
# manually before running make build.
# If you change this path, you also need to edit
# `create_package_with_all_dependencies()` in install-arrow.R
thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies")
download_binary <- function(lib) {
libfile <- tempfile()
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile)) {
if (!quietly) {
cat(sprintf("*** Successfully retrieved C++ binaries (%s)\n", lib))
}
} else {
if (!quietly) {
cat(sprintf("*** Downloading libarrow binary failed for version %s (%s)\n at %s\n",
VERSION, lib, binary_url))
}
libfile <- NULL
}
libfile
}
# Function to figure out which flavor of binary we should download, if at all.
# LIBARROW_BINARY controls the behavior. If unset, it will determine a course
# of action based on the current system. Other values you can set it to:
# * "FALSE" (not case-sensitive), to skip this option altogether
# * "TRUE" (not case-sensitive), to try to discover your current OS, or
# * Some other string: a "distro-version" that corresponds to a binary that is
# available, to override what this function may discover by default.
# Possible values are:
# * "centos-7" (gcc 8 (devtoolset), openssl 1, glib 2.17)
# * "ubuntu-18.04" (gcc 8, openssl 1, glib 2.27)
# * "ubuntu-22.04" (openssl 3)
# These string values, along with `NULL`, are the potential return values of
# this function.
identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) {
lib <- tolower(lib)
if (identical(lib, "")) {
# Not specified. Check the allowlist.
lib <- ifelse(check_allowlist(info$id), "true", "false")
}
if (identical(lib, "false")) {
# Do not download a binary
NULL
} else if (!identical(lib, "true")) {
# Env var provided an os-version to use, to override our logic.
# We don't validate that this exists. If it doesn't, the download will fail
# and the build will fall back to building from source
lib
} else {
# See if we can find a suitable binary
select_binary()
}
}
check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") {
allowlist <- tryCatch(
# Try a remote allowlist so that we can add/remove without a release
suppressWarnings(readLines(allowed)),
# Fallback to default: allowed only on Ubuntu and CentOS/RHEL
error = function(e) c("ubuntu", "centos", "redhat", "rhel")
)
# allowlist should contain valid regular expressions (plain strings ok too)
any(grepl(paste(allowlist, collapse = "|"), os))
}
select_binary <- function(os = tolower(Sys.info()[["sysname"]]),
arch = tolower(Sys.info()[["machine"]]),
test_program = test_for_curl_and_openssl) {
if (identical(os, "linux") && identical(arch, "x86_64")) {
# We only host x86 linux binaries today
tryCatch(
# Somehow the test program system2 call errors on the sanitizer builds
# so globally handle the possibility that this could fail
{
errs <- compile_test_program(test_program)
determine_binary_from_stderr(errs)
},
error = function(e) {
cat("*** Unable to find libcurl and openssl\n")
NULL
}
)
} else {
# No binary available for arch
cat(sprintf("*** Building on %s %s\n", os, arch))
NULL
}
}
# This tests that curl and openssl are present (bc we can include their headers)
# and it checks for other versions/features and raises errors that we grep for
test_for_curl_and_openssl <- "
#include <ciso646>
#ifdef _LIBCPP_VERSION
#error Using libc++
#endif
#if !( __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 27)
#error glibc version too old
#endif
#include <curl/curl.h>
#include <openssl/opensslv.h>
#if OPENSSL_VERSION_NUMBER < 0x10002000L
#error OpenSSL version too old
#endif
#if OPENSSL_VERSION_NUMBER >= 0x30000000L
#error Using OpenSSL version 3
#endif
"
compile_test_program <- function(code) {
# Note: if we wanted to check for openssl on macOS, we'd have to set the brew
# path as a -I directory. But since we (currently) only run this code to
# determine whether we can download a Linux binary, it's not relevant.
runner <- "`R CMD config CXX17` `R CMD config CPPFLAGS` `R CMD config CXX17FLAGS` `R CMD config CXX17STD` -E -xc++"
suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner), stdout = FALSE, stderr = TRUE))
}
# TODO(ARROW-16976): drop "ubuntu-18.04" and just use "centos-7"
# (built with newer devtoolset but older glibc (2.17) for broader compatibility,# like manylinux2014)
determine_binary_from_stderr <- function(errs) {
if (is.null(attr(errs, "status"))) {
# There was no error in compiling: so we found libcurl and openssl > 1.0.2,
# openssl is < 3.0, glibc is >= 2.27, and we're not using a strict libc++
cat("*** Found libcurl and openssl >= 1.0.2\n")
return("ubuntu-18.04")
# Else, check for dealbreakers:
} else if (any(grepl("Using libc++", errs, fixed = TRUE))) {
# Our binaries are all built with GNU stdlib so they fail with libc++
cat("*** Found libc++\n")
return(NULL)
} else if (header_not_found("curl/curl", errs)) {
cat("*** libcurl not found\n")
return(NULL)
} else if (header_not_found("openssl/opensslv", errs)) {
cat("*** openssl not found\n")
return(NULL)
} else if (any(grepl("OpenSSL version too old", errs))) {
cat("*** openssl found but version >= 1.0.2 is required for some features\n")
return(NULL)
# Else, determine which other binary will work
} else if (any(grepl("glibc version too old", errs))) {
# ubuntu-18.04 has glibc 2.27, so even if you install newer compilers
# (e.g. devtoolset on centos) and have curl/openssl, you run into problems
# TODO(ARROW-16976): build binaries with older glibc
cat("*** Checking glibc version\n")
# If we're here, we're on an older OS but with a new enough compiler
# (e.g. CentOS 7 with devtoolset-8)
return("centos-7")
} else if (any(grepl("Using OpenSSL version 3", errs))) {
cat("*** Found libcurl and openssl >= 3.0.0\n")
return("ubuntu-22.04")
}
NULL
}
header_not_found <- function(header, errs) {
regex <- sprintf("[Ee]rror.*%s\\.h", header)
any(grepl(regex, errs))
}
#### start distro ####
distro <- function() {
# The code in this script is a (potentially stale) copy of the distro package
if (requireNamespace("distro", quietly = TRUE)) {
# Use the version from the package, which may be updated from this
return(distro::distro())
}
out <- lsb_release()
if (is.null(out)) {
out <- os_release()
if (is.null(out)) {
out <- system_release()
}
}
if (is.null(out)) {
return(NULL)
}
out$id <- tolower(out$id)
# debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name
if (is.null(out$version) || out$version %in% c("testing", "unstable")) {
if (grepl("bullseye", out$codename)) {
out$short_version <- "11"
} else if (grepl("bookworm", out$codename)) {
out$short_version <- "12"
}
} else if (out$id == "ubuntu") {
# Keep major.minor version
out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
} else {
# Only major version number
out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
}
out
}
lsb_release <- function() {
if (have_lsb_release()) {
list(
id = call_lsb("-is"),
version = call_lsb("-rs"),
codename = call_lsb("-cs")
)
} else {
NULL
}
}
have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
os_release <- function() {
rel_data <- read_os_release()
if (!is.null(rel_data)) {
vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
out <- list(
id = vals[["ID"]],
version = vals[["VERSION_ID"]]
)
if ("VERSION_CODENAME" %in% names(vals)) {
out$codename <- vals[["VERSION_CODENAME"]]
} else {
# This probably isn't right, maybe could extract codename from pretty name?
out$codename <- vals[["PRETTY_NAME"]]
}
out
} else {
NULL
}
}
read_os_release <- function() {
if (file.exists("/etc/os-release")) {
readLines("/etc/os-release")
}
}
system_release <- function() {
rel_data <- read_system_release()
if (!is.null(rel_data)) {
# Something like "CentOS Linux release 7.7.1908 (Core)"
list(
id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
codename = NA
)
} else {
NULL
}
}
read_system_release <- function() {
if (file.exists("/etc/system-release")) {
readLines("/etc/system-release")[1]
}
}
#### end distro ####
find_local_source <- function() {
# We'll take the first of these that exists
# The first case probably occurs if we're in the arrow git repo
# The second probably occurs if we're installing the arrow R package
cpp_dir_options <- c(
file.path(Sys.getenv("ARROW_SOURCE_HOME", ".."), "cpp"),
"tools/cpp"
)
for (cpp_dir in cpp_dir_options) {
if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) {
cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n"))
return(cpp_dir)
}
}
NULL
}
env_vars_as_string <- function(env_var_list) {
# Do some basic checks on env_var_list:
# Check that env_var_list has names, that those names are valid POSIX
# environment variables, and that none of the values contain `'`.
stopifnot(
length(env_var_list) == length(names(env_var_list)),
all(grepl("^[^0-9]", names(env_var_list))),
all(grepl("^[A-Z0-9_]+$", names(env_var_list))),
!any(grepl("'", env_var_list, fixed = TRUE))
)
env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ")
if (nchar(env_var_string) > 30000) {
# This could happen if the full paths in *_SOURCE_URL were *very* long.
# A more formal check would look at getconf ARG_MAX, but this shouldn't matter
cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n")
}
env_var_string
}
R_CMD_config <- function(var) {
tools::Rcmd(paste("config", var), stdout = TRUE)
}
build_libarrow <- function(src_dir, dst_dir) {
# We'll need to compile R bindings with these libs, so delete any .o files
system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE)
# Set up make for parallel building
makeflags <- Sys.getenv("MAKEFLAGS")
if (makeflags == "") {
# CRAN policy says not to use more than 2 cores during checks
# If you have more and want to use more, set MAKEFLAGS
ncores <- min(parallel::detectCores(), 2)
makeflags <- sprintf("-j%s", ncores)
Sys.setenv(MAKEFLAGS = makeflags)
}
if (!quietly) {
cat("*** Building with MAKEFLAGS=", makeflags, "\n")
}
# Check for libarrow build dependencies:
# * cmake
cmake <- ensure_cmake()
# Optionally build somewhere not in tmp so we can dissect the build if it fails
debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR")
if (nzchar(debug_dir)) {
build_dir <- debug_dir
} else {
# But normally we'll just build in a tmp dir
build_dir <- tempfile()
}
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir))
env_var_list <- c(
SOURCE_DIR = src_dir,
BUILD_DIR = build_dir,
DEST_DIR = dst_dir,
CMAKE = cmake,
# EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it defined
EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"),
# Make sure we build with the same compiler settings that R is using
# Exception: if you've added ccache to CC and CXX following
# http://dirk.eddelbuettel.com/blog/2017/11/27/, some libarrow
# third party dependencies will error on compilation. But don't
# worry, `ARROW_USE_CCACHE=ON` by default, so if ccache
# is found, it will be used by the libarrow build, and this does
# not affect how R compiles the arrow bindings.
CC = sub("^.*ccache", "", R_CMD_config("CC")),
CXX = paste(sub("^.*ccache", "", R_CMD_config("CXX17")), R_CMD_config("CXX17STD")),
# CXXFLAGS = R_CMD_config("CXX17FLAGS"), # We don't want the same debug symbols
LDFLAGS = R_CMD_config("LDFLAGS")
)
env_var_list <- with_cloud_support(env_var_list)
# turn_off_all_optional_features() needs to happen after
# with_cloud_support(), since it might turn features ON.
thirdparty_deps_unavailable <- !download_ok &&
!dir.exists(thirdparty_dependency_dir) &&
!env_is("ARROW_DEPENDENCY_SOURCE", "system")
do_minimal_build <- env_is("LIBARROW_MINIMAL", "true")
if (do_minimal_build) {
env_var_list <- turn_off_all_optional_features(env_var_list)
} else if (thirdparty_deps_unavailable) {
cat(paste0(
"*** Building C++ library from source, but downloading thirdparty dependencies\n",
" is not possible, so this build will turn off all thirdparty features.\n",
" See installation guide for details:\n",
" https://arrow.apache.org/docs/r/articles/install.html\n"
))
env_var_list <- turn_off_all_optional_features(env_var_list)
} else if (dir.exists(thirdparty_dependency_dir)) {
# Add the *_SOURCE_URL env vars
env_var_list <- set_thirdparty_urls(env_var_list)
}
env_vars <- env_vars_as_string(env_var_list)
cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n")
status <- suppressWarnings(system(
paste(env_vars, "inst/build_arrow_static.sh"),
ignore.stdout = quietly, ignore.stderr = quietly
))
if (status != 0) {
# It failed :(
cat(
"**** Error building Arrow C++.",
ifelse(env_is("ARROW_R_DEV", "true"), "", "Re-run with ARROW_R_DEV=true for debug information."),
"\n"
)
}
invisible(status)
}
ensure_cmake <- function() {
cmake <- find_cmake()
if (is.null(cmake)) {
# If not found, download it
cat("**** cmake\n")
CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.21.4")
if (tolower(Sys.info()[["sysname"]]) %in% "darwin") {
postfix <- "-macos-universal.tar.gz"
} else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) {
postfix <- "-linux-aarch64.tar.gz"
} else if (tolower(Sys.info()[["machine"]]) == "x86_64") {
postfix <- "-linux-x86_64.tar.gz"
} else {
stop(paste0(
"*** cmake was not found locally.\n",
" Please make sure cmake >= 3.10 is installed and available on your PATH.\n"
))
}
cmake_binary_url <- paste0(
"https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
"/cmake-", CMAKE_VERSION, postfix
)
cmake_tar <- tempfile()
cmake_dir <- tempfile()
download_successful <- try_download(cmake_binary_url, cmake_tar)
if (!download_successful) {
cat(paste0(
"*** cmake was not found locally and download failed.\n",
" Make sure cmake >= 3.10 is installed and available on your PATH,\n",
" or download ", cmake_binary_url, "\n",
" and define the CMAKE environment variable.\n"
))
}
untar(cmake_tar, exdir = cmake_dir)
unlink(cmake_tar)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
cmake <- paste0(
cmake_dir,
"/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE),
"/bin/cmake"
)
} else {
# Show which one we found
# Full source builds will always show "cmake" in the logs
cat(sprintf("**** cmake: %s\n", cmake))
}
cmake
}
find_cmake <- function(paths = c(
Sys.getenv("CMAKE"),
Sys.which("cmake"),
Sys.which("cmake3")
),
version_required = "3.10") {
# Given a list of possible cmake paths, return the first one that exists and is new enough
# version_required should be a string or packageVersion; numeric version
# can be misleading (e.g. 3.10 is actually 3.1)
for (path in paths) {
if (nzchar(path) && cmake_version(path) >= version_required) {
# Sys.which() returns a named vector, but that plays badly with c() later
names(path) <- NULL
return(path)
}
}
# If none found, return NULL
NULL
}
cmake_version <- function(cmd = "cmake") {
tryCatch(
{
raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE)
pat <- ".* ([0-9\\.]+).*?"
which_line <- grep(pat, raw_version)
package_version(sub(pat, "\\1", raw_version[which_line]))
},
error = function(e) {
return(0)
}
)
}
turn_off_all_optional_features <- function(env_var_list) {
# Because these are done as environment variables (as opposed to build flags),
# setting these to "OFF" overrides any previous setting. We don't need to
# check the existing value.
# Some features turn on other features (e.g. substrait -> protobuf),
# So the list of things to turn off is long. See:
# https://github.com/apache/arrow/blob/main/cpp/cmake_modules/ThirdpartyToolchain.cmake#L275
turn_off <- c(
"ARROW_MIMALLOC" = "OFF",
"ARROW_JEMALLOC" = "OFF",
"ARROW_JSON" = "OFF",
"ARROW_PARQUET" = "OFF", # depends on thrift
"ARROW_DATASET" = "OFF", # depends on parquet
"ARROW_S3" = "OFF",
"ARROW_GCS" = "OFF",
"ARROW_WITH_GOOGLE_CLOUD_CPP" = "OFF",
"ARROW_WITH_NLOHMANN_JSON" = "OFF",
"ARROW_SUBSTRAIT" = "OFF",
"ARROW_WITH_PROTOBUF" = "OFF",
"ARROW_WITH_BROTLI" = "OFF",
"ARROW_WITH_BZ2" = "OFF",
"ARROW_WITH_LZ4" = "OFF",
"ARROW_WITH_SNAPPY" = "OFF",
"ARROW_WITH_ZLIB" = "OFF",
"ARROW_WITH_ZSTD" = "OFF",
"ARROW_WITH_RE2" = "OFF",
"ARROW_WITH_UTF8PROC" = "OFF",
# The syntax to turn off XSIMD is different.
# Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
"EXTRA_CMAKE_FLAGS" = paste(
env_var_list[["EXTRA_CMAKE_FLAGS"]],
"-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
)
)
# Create a new env_var_list, with the values of turn_off set.
# replace() also adds new values if they didn't exist before
replace(env_var_list, names(turn_off), turn_off)
}
get_component_names <- function() {
if (!isTRUE(Sys.which("bash") != "")) {
stop("nixlibs.R requires bash to be installed and available in your PATH")
}
deps_bash <- "tools/download_dependencies_R.sh"
csv_tempfile <- tempfile(fileext = ".csv")
deps_bash_success <- system2("bash", deps_bash, stdout = csv_tempfile) == 0
if (!deps_bash_success) {
stop("Failed to run download_dependencies_R.sh")
}
deps_df <- read.csv(csv_tempfile,
stringsAsFactors = FALSE, row.names = NULL, quote = "'"
)
stopifnot(
names(deps_df) == c("env_varname", "filename"),
nrow(deps_df) > 0
)
deps_df
}
set_thirdparty_urls <- function(env_var_list) {
# This function does *not* check if existing *_SOURCE_URL variables are set.
# The directory tools/thirdparty_dependencies is created by
# create_package_with_all_dependencies() and saved in the tar file.
deps_df <- get_component_names()
dep_dir <- normalizePath(thirdparty_dependency_dir, mustWork = TRUE)
deps_df$full_filenames <- file.path(dep_dir, deps_df$filename)
files_exist <- file.exists(deps_df$full_filenames)
if (!any(files_exist)) {
stop("Dependency tar files did not exist in '", dep_dir, "'")
}
# Only set env var for files that are in thirdparty_dependency_dir
# (allows for a user to download a limited set of tar files, if they wanted)
deps_df <- deps_df[files_exist, ]
env_var_list <- replace(env_var_list, deps_df$env_varname, deps_df$full_filenames)
if (!quietly) {
env_var_list <- replace(env_var_list, "ARROW_VERBOSE_THIRDPARTY_BUILD", "ON")
}
env_var_list
}
is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) {
env_value <- tolower(Sys.getenv(env_varname))
if (identical(env_value, "off")) {
# If e.g. ARROW_MIMALLOC=OFF explicitly, override default
requested <- FALSE
} else if (identical(env_value, "on")) {
requested <- TRUE
} else {
requested <- default
}
requested
}
with_cloud_support <- function(env_var_list) {
arrow_s3 <- is_feature_requested("ARROW_S3")
arrow_gcs <- is_feature_requested("ARROW_GCS")
if (arrow_s3 || arrow_gcs) {
# User wants S3 or GCS support.
# Make sure that we have curl and openssl system libs
feats <- c(
if (arrow_s3) "S3",
if (arrow_gcs) "GCS"
)
start_msg <- paste(feats, collapse = "/")
off_flags <- paste("ARROW_", feats, "=OFF", sep = "", collapse = " and ")
print_warning <- function(msg) {
# Utility to assemble warning message in the console
cat("**** ", start_msg, " support ", msg, "; building with ", off_flags, "\n")
}
# Check the features
# This duplicates what we do with the test program above when we check
# capabilities for using binaries. We could consider consolidating this
# logic, though these use cmake in order to match exactly what we do in the
# libarrow build, and maybe that increases the fidelity.
if (!cmake_find_package("CURL", NULL, env_var_list)) {
# curl on macos should be installed, so no need to alter this for macos
# TODO: check for apt/yum/etc. and message the right thing?
print_warning("requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb)")
arrow_s3 <- FALSE
arrow_gcs <- FALSE
} else if (!cmake_find_package("OpenSSL", "1.0.2", env_var_list)) {
print_warning("requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew)")
arrow_s3 <- FALSE
arrow_gcs <- FALSE
}
}
# Update the build flags
env_var_list <- replace(env_var_list, "ARROW_S3", ifelse(arrow_s3, "ON", "OFF"))
replace(env_var_list, "ARROW_GCS", ifelse(arrow_gcs, "ON", "OFF"))
}
cmake_find_package <- function(pkg, version = NULL, env_var_list) {
td <- tempfile()
dir.create(td)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td))
find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)")
writeLines(find_package, file.path(td, "CMakeLists.txt"))
env_vars <- env_vars_as_string(env_var_list)
cmake_cmd <- paste0(
"export ", env_vars,
" && cd ", td,
" && $CMAKE ",
" -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON",
" -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON",
" ."
)
system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0
}
#####
if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
# If we're working in a local checkout and have already built the libs, we
# don't need to do anything. Otherwise,
# (1) Look for a prebuilt binary for this version
bin_file <- src_dir <- NULL
if (!identical(Sys.getenv("ARROW_DOWNLOADED_BINARIES"), "")) {
bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES")
cat(sprintf("*** Using pre-downloaded zip for libarrow binaries: %s\n", bin_zip))
if (file.exists(bin_zip)) {
bin_file <- tempfile()
file.copy(bin_zip, bin_file)
} else {
cat(sprintf("*** File not found: %s ($ARROW_DOWNLOADED_BINARIES)\n", bin_zip))
bin_file <- NULL
}
} else if (download_ok) {
binary_flavor <- identify_binary()
if (!is.null(binary_flavor)) {
# The env vars say we can, and we've determined a lib that should work
bin_file <- download_binary(binary_flavor)
}
}
if (!is.null(bin_file)) {
# Extract them
dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
unzip(bin_file, exdir = dst_dir)
unlink(bin_file)
} else if (build_ok) {
# (2) Find source and build it
src_dir <- find_local_source()
if (!is.null(src_dir)) {
cat(paste0(
"*** Building libarrow from source\n",
" For build options and troubleshooting, see the install guide:\n",
" https://arrow.apache.org/docs/r/articles/install.html\n"
))
build_libarrow(src_dir, dst_dir)
} else {
cat("*** Proceeding without libarrow (no local source)\n")
}
} else {
cat("*** Proceeding without libarrow (build not authorized)\n")
}
}