blob: 0dab4b3d61e483cc6100db5ebeac4d96686efefd [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
args <- commandArgs(TRUE)
VERSION <- args[1]
dst_dir <- paste0("libarrow/arrow-", VERSION)
arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) {
# default method doesn't work on R 3.3, nor does libcurl
options(download.file.method = "wget")
}
options(.arrow.cleanup = character()) # To collect dirs to rm on exit
on.exit(unlink(getOption(".arrow.cleanup")))
env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
# * no download, build_ok: Only build with local git checkout
# * download_ok, no build: Only use prebuilt binary, if found
# * neither: Get the arrow-without-arrow package
# Download and build are OK unless you say not to
download_ok <- !env_is("LIBARROW_DOWNLOAD", "false")
build_ok <- !env_is("LIBARROW_BUILD", "false")
# But binary defaults to not OK
binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false")
# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
quietly <- !env_is("ARROW_R_DEV", "true")
try_download <- function(from_url, to_file) {
status <- try(
suppressWarnings(
download.file(from_url, to_file, quiet = quietly)
),
silent = quietly
)
# Return whether the download was successful
!inherits(status, "try-error") && status == 0
}
download_binary <- function(os = identify_os()) {
libfile <- tempfile()
if (!is.null(os)) {
# See if we can map this os-version to one we have binaries for
os <- find_available_binary(os)
binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile)) {
cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
if (!identical(os, "centos-7")) {
# centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do
# TODO: actually check for system requirements?
cat("**** Binary package requires libcurl and openssl\n")
cat("**** If installation fails, retry after installing those system requirements\n")
}
} else {
cat(sprintf("*** No C++ binaries found for %s\n", os))
libfile <- NULL
}
} else {
libfile <- NULL
}
libfile
}
# Function to figure out which flavor of binary we should download, if at all.
# By default (unset or "FALSE"), it will not download a precompiled library,
# but you can override this by setting the env var LIBARROW_BINARY to:
# * `TRUE` (not case-sensitive), to try to discover your current OS, or
# * some other string, presumably a related "distro-version" that has binaries
# built that work for your OS
identify_os <- function(os = Sys.getenv("LIBARROW_BINARY", Sys.getenv("LIBARROW_DOWNLOAD"))) {
if (tolower(os) %in% c("", "false")) {
# Env var says not to download a binary
return(NULL)
} else if (!identical(tolower(os), "true")) {
# Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but
# we only build for 18.04 and that's fine--so use what the user set
return(os)
}
linux <- distro()
if (is.null(linux)) {
cat("*** Unable to identify current OS/version\n")
return(NULL)
}
paste(linux$id, linux$short_version, sep = "-")
}
#### start distro ####
distro <- function() {
# The code in this script is a (potentially stale) copy of the distro package
if (requireNamespace("distro", quietly = TRUE)) {
# Use the version from the package, which may be updated from this
return(distro::distro())
}
out <- lsb_release()
if (is.null(out)) {
out <- os_release()
if (is.null(out)) {
out <- system_release()
}
}
if (is.null(out)) {
return(NULL)
}
out$id <- tolower(out$id)
if (grepl("bullseye", out$codename)) {
# debian unstable doesn't include a number but we can map from pretty name
out$short_version <- "11"
} else if (out$id == "ubuntu") {
# Keep major.minor version
out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
} else {
# Only major version number
out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
}
out
}
lsb_release <- function() {
if (have_lsb_release()) {
list(
id = call_lsb("-is"),
version = call_lsb("-rs"),
codename = call_lsb("-cs")
)
} else {
NULL
}
}
have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
os_release <- function() {
rel_data <- read_os_release()
if (!is.null(rel_data)) {
vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
out <- list(
id = vals[["ID"]],
version = vals[["VERSION_ID"]]
)
if ("VERSION_CODENAME" %in% names(vals)) {
out$codename <- vals[["VERSION_CODENAME"]]
} else {
# This probably isn't right, maybe could extract codename from pretty name?
out$codename = vals[["PRETTY_NAME"]]
}
out
} else {
NULL
}
}
read_os_release <- function() {
if (file.exists("/etc/os-release")) {
readLines("/etc/os-release")
}
}
system_release <- function() {
rel_data <- read_system_release()
if (!is.null(rel_data)) {
# Something like "CentOS Linux release 7.7.1908 (Core)"
list(
id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
codename = NA
)
} else {
NULL
}
}
read_system_release <- function() utils::head(readLines("/etc/system-release"), 1)
#### end distro ####
find_available_binary <- function(os) {
# Download a csv that maps one to the other, columns "actual" and "use_this"
u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv"
lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly)
if (!inherits(lookup, "try-error") && os %in% lookup$actual) {
new <- lookup$use_this[lookup$actual == os]
if (length(new) == 1 && !is.na(new)) { # Just some sanity checking
cat(sprintf("*** Using %s binary for %s\n", new, os))
os <- new
}
}
os
}
download_source <- function() {
tf1 <- tempfile()
src_dir <- tempfile()
if (bintray_download(tf1)) {
# First try from bintray
cat("*** Successfully retrieved C++ source\n")
unzip(tf1, exdir = src_dir)
unlink(tf1)
src_dir <- paste0(src_dir, "/cpp")
} else if (apache_download(tf1)) {
# If that fails, try for an official release
cat("*** Successfully retrieved C++ source\n")
untar(tf1, exdir = src_dir)
unlink(tf1)
src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
}
if (dir.exists(src_dir)) {
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
# These scripts need to be executable
system(
sprintf("chmod 755 %s/build-support/*.sh", src_dir),
ignore.stdout = quietly, ignore.stderr = quietly
)
return(src_dir)
} else {
return(NULL)
}
}
bintray_download <- function(destfile) {
source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
try_download(source_url, destfile)
}
apache_download <- function(destfile, n_mirrors = 3) {
apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
apache_urls <- c(
# This returns a different mirror each time
rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
"https://downloads.apache.org/" # The backup
)
downloaded <- FALSE
for (u in apache_urls) {
downloaded <- try_download(paste0(u, apache_path), destfile)
if (downloaded) {
break
}
}
downloaded
}
find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
# We're in a git checkout of arrow, so we can build it
cat("*** Found local C++ source\n")
return(paste0(arrow_home, "/cpp"))
} else {
return(NULL)
}
}
build_libarrow <- function(src_dir, dst_dir) {
# We'll need to compile R bindings with these libs, so delete any .o files
system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE)
# Set up make for parallel building
makeflags <- Sys.getenv("MAKEFLAGS")
if (makeflags == "") {
# CRAN policy says not to use more than 2 cores during checks
# If you have more and want to use more, set MAKEFLAGS
ncores <- min(parallel::detectCores(), 2)
makeflags <- sprintf("-j%s", ncores)
Sys.setenv(MAKEFLAGS = makeflags)
}
if (!quietly) {
cat("*** Building with MAKEFLAGS=", makeflags, "\n")
}
# Check for libarrow build dependencies:
# * cmake
cmake <- ensure_cmake()
# Optionally build somewhere not in tmp so we can dissect the build if it fails
debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR")
if (nzchar(debug_dir)) {
build_dir <- debug_dir
} else {
# But normally we'll just build in a tmp dir
build_dir <- tempfile()
}
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir))
R_CMD_config <- function(var) {
if (getRversion() < 3.4) {
# var names were called CXX1X instead of CXX11
var <- sub("^CXX11", "CXX1X", var)
}
# tools::Rcmd introduced R 3.3
tools::Rcmd(paste("config", var), stdout = TRUE)
}
env_var_list <- c(
SOURCE_DIR = src_dir,
BUILD_DIR = build_dir,
DEST_DIR = dst_dir,
CMAKE = cmake,
# Make sure we build with the same compiler settings that R is using
CC = R_CMD_config("CC"),
CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
# CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols
LDFLAGS = R_CMD_config("LDFLAGS")
)
env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ")
env_vars <- with_s3_support(env_vars)
env_vars <- with_mimalloc(env_vars)
if (tolower(Sys.info()[["sysname"]]) %in% "sunos") {
# jemalloc doesn't seem to build on Solaris
env_vars <- paste(env_vars, "ARROW_JEMALLOC=OFF")
}
cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n")
status <- system(
paste(env_vars, "inst/build_arrow_static.sh"),
ignore.stdout = quietly, ignore.stderr = quietly
)
if (status != 0) {
# It failed :(
cat("**** Error building Arrow C++. Re-run with ARROW_R_DEV=true for debug information.\n")
}
invisible(status)
}
ensure_cmake <- function() {
cmake <- find_cmake(c(
Sys.getenv("CMAKE"),
Sys.which("cmake"),
Sys.which("cmake3")
))
if (is.null(cmake)) {
# If not found, download it
cat("**** cmake\n")
CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2")
cmake_binary_url <- paste0(
"https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
"/cmake-", CMAKE_VERSION, "-Linux-x86_64.tar.gz"
)
cmake_tar <- tempfile()
cmake_dir <- tempfile()
try_download(cmake_binary_url, cmake_tar)
untar(cmake_tar, exdir = cmake_dir)
unlink(cmake_tar)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
cmake <- paste0(
cmake_dir,
"/cmake-", CMAKE_VERSION, "-Linux-x86_64",
"/bin/cmake"
)
}
cmake
}
find_cmake <- function(paths, version_required = 3.2) {
# Given a list of possible cmake paths, return the first one that exists and is new enough
for (path in paths) {
if (nzchar(path) && cmake_version(path) >= version_required) {
# Sys.which() returns a named vector, but that plays badly with c() later
names(path) <- NULL
return(path)
}
}
# If none found, return NULL
NULL
}
cmake_version <- function(cmd = "cmake") {
tryCatch(
{
raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE)
pat <- ".* ([0-9\\.]+).*?"
which_line <- grep(pat, raw_version)
package_version(sub(pat, "\\1", raw_version[which_line]))
},
error = function(e) return(0)
)
}
with_s3_support <- function(env_vars) {
arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
if (arrow_s3) {
# User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9
# and make sure that we have curl and openssl system libs
if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n")
arrow_s3 <- FALSE
} else if (!cmake_find_package("CURL", NULL, env_vars)) {
cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n")
arrow_s3 <- FALSE
} else if (!cmake_find_package("OpenSSL", "1.0.2", env_vars)) {
cat("**** S3 support requires openssl-devel (rpm) or libssl-dev (deb), version >= 1.0.2; building with ARROW_S3=OFF\n")
arrow_s3 <- FALSE
}
}
paste(env_vars, ifelse(arrow_s3, "ARROW_S3=ON", "ARROW_S3=OFF"))
}
with_mimalloc <- function(env_vars) {
arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
if (arrow_mimalloc) {
# User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n")
arrow_mimalloc <- FALSE
}
}
paste(env_vars, ifelse(arrow_mimalloc, "ARROW_MIMALLOC=ON", "ARROW_MIMALLOC=OFF"))
}
cmake_gcc_version <- function(env_vars) {
# This function returns NA if using a non-gcc compiler
# Always enclose calls to it in isTRUE() or isFALSE()
vals <- cmake_cxx_compiler_vars(env_vars)
if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) {
return(NA)
}
package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]])
}
cmake_cxx_compiler_vars <- function(env_vars) {
info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE)
info <- grep("^[A-Z_]* .*$", info, value = TRUE)
vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info))
names(vals) <- sub("^(.*?) .*$", "\\1", info)
vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))]
}
cmake_find_package <- function(pkg, version = NULL, env_vars) {
td <- tempfile()
dir.create(td)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td))
find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)")
writeLines(find_package, file.path(td, "CMakeLists.txt"))
cmake_cmd <- paste0(
"export ", env_vars,
" && cd ", td,
" && $CMAKE ",
" -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON",
" -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON",
" ."
)
system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0
}
#####
if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
# If we're working in a local checkout and have already built the libs, we
# don't need to do anything. Otherwise,
# (1) Look for a prebuilt binary for this version
bin_file <- src_dir <- NULL
if (download_ok && binary_ok) {
bin_file <- download_binary()
}
if (!is.null(bin_file)) {
# Extract them
dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
unzip(bin_file, exdir = dst_dir)
unlink(bin_file)
} else if (build_ok) {
# (2) Find source and build it
if (download_ok) {
src_dir <- download_source()
}
if (is.null(src_dir)) {
src_dir <- find_local_source()
}
if (!is.null(src_dir)) {
cat("*** Building C++ libraries\n")
build_libarrow(src_dir, dst_dir)
} else {
cat("*** Proceeding without C++ dependencies\n")
}
} else {
cat("*** Proceeding without C++ dependencies\n")
}
}