blob: 4f09cfdc4419b5e2df7b9138137a6a18bbff2f17 [file] [log] [blame]
#!/usr/bin/env sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script makes sure we have our C++ dependencies in order
# so that we can compile the bindings in src/ and build the
# R package's shared library.
#
# The core logic is:
#
# * Find libarrow on the system. If it is present, make sure
# that its version is compatible with the R package.
# * If no suitable libarrow is found, download it (where allowed)
# or build it from source.
# * Determine what features this libarrow has and what other
# flags it requires, and set them in src/Makevars for use when
# compiling the bindings.
# * Run a test program to confirm that arrow headers are found
#
# It is intended to be flexible enough to account for several
# workflows, and should be expected to always result in a successful
# build as long as you haven't set environment variables that
# would limit its options.
#
# * Installing a released version from source, as from CRAN, with
# no other prior setup
# * On macOS and Linux, the nixlibs.R build script will download
# or build libarrow and dependencies
# * Installing a released version but first installing libarrow.
# It will use pkg-config and brew to search for libraries.
# * Installing a development version from source as a user.
# Any libarrow found on the system corresponding to a release will not match
# the dev version and will thus be skipped. nixlibs.R will be used to build libarrow.
# * TODO(GH-35049): Installing a dev version as an infrequent contributor.
# Currently the configure script doesn't offer much to make this easy.
# If you expect to rebuild multiple times, you should set up a dev
# environment.
# * Installing a dev version as a regular developer.
# The best way is to maintain your own cmake build and install it
# to a directory (not system) that you set as the env var
# $ARROW_HOME.
#
# For more information, see the various installation and developer vignettes.
# Library settings
PKG_CONFIG_NAME="arrow"
PKG_BREW_NAME="apache-arrow"
PKG_TEST_HEADER="<arrow/api.h>"
# Some env vars that control the build (all logical, case insensitive)
# Development mode, also increases verbosity in the bundled build
ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
# The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up
# any other packages that may be found on the system
FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
# If present, `pkg-config` will be used to find libarrow on the system,
# unless this is set to false
ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
# Just used in testing: whether or not it is ok to download dependencies (in the
# bundled build)
TEST_OFFLINE_BUILD=`echo $TEST_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'`
VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
UNAME=`uname -s`
: ${PKG_CONFIG:="pkg-config"}
# These will only be set in the bundled build
S3_LIBS=""
GCS_LIBS=""
# If in development mode, run the codegen script to render arrowExports.*
if [ "$ARROW_R_DEV" = "true" ] && [ -f "data-raw/codegen.R" ]; then
echo "*** Generating code with data-raw/codegen.R"
${R_HOME}/bin/Rscript data-raw/codegen.R
fi
# Arrow requires C++17, so check for it
if [ ! "`${R_HOME}/bin/R CMD config CXX17`" ]; then
echo "------------------------- NOTE ---------------------------"
echo "Cannot install arrow: a C++17 compiler is required."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "---------------------------------------------------------"
exit 1
fi
# GH-36969: The version of Abseil used in the bundled build won't build on
# gcc-13. As a workaround for the 13.0.0 release, explicitly set
# ARROW_WITH_GOOGLE_CLOUD_CPP to OFF (if not already set)
if [ -z "$ARROW_GCS" ]; then
CXX17=`${R_HOME}/bin/R CMD config CXX17`
CXX17_VERSION=`$CXX17 --version`
if echo "$CXX17_VERSION" | grep -e "g++" > /dev/null ; then
if echo "$CXX17_VERSION" | grep -e "13.[0-9]\+.[0-9]\+" > /dev/null ; then
echo "*** Disabling Arrow build with GCS on gcc-13."
echo "*** Set ARROW_GCS=ON to explicitly enable."
export ARROW_GCS="OFF"
fi
fi
fi
# Test if pkg-config is available to use
if ${PKG_CONFIG} --version >/dev/null 2>&1; then
PKG_CONFIG_AVAILABLE="true"
else
PKG_CONFIG_AVAILABLE="false"
ARROW_USE_PKG_CONFIG="false"
fi
## Find openssl
# Arrow's cmake process uses this same process to find openssl,
# but doing it now allows us to catch it in
# nixlibs.R and activate S3 and GCS support for the source build.
# macOS ships with libressl. openssl is installable with brew, but it is
# generally not linked. We can over-ride this and find
# openssl by setting OPENSSL_ROOT_DIR (which cmake will pick up later in
# the installation process).
if [ "${OPENSSL_ROOT_DIR}" = "" ] && brew --prefix openssl >/dev/null 2>&1; then
export OPENSSL_ROOT_DIR="`brew --prefix openssl`"
export PKG_CONFIG_PATH="${OPENSSL_ROOT_DIR}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
fi
# Look for openssl with pkg-config for non-brew sources(e.g. CRAN) and Linux
if [ "${OPENSSL_ROOT_DIR}" = "" ] && [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
if ${PKG_CONFIG} --exists openssl; then
export OPENSSL_ROOT_DIR="`${PKG_CONFIG} --variable=prefix openssl`"
fi
fi
#############
# Functions #
#############
find_or_build_libarrow () {
if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
do_bundled_build
else
find_arrow
if [ "$_LIBARROW_FOUND" = "false" ]; then
# If we haven't found a suitable version of libarrow, build it
do_bundled_build
fi
fi
}
# This function looks in a few places for libarrow on the system already.
# If the found library version is not compatible with the R package,
# it won't be used.
find_arrow () {
# Preserve original PKG_CONFIG_PATH. We'll add ${LIB_DIR}/pkgconfig to it if needed
OLD_PKG_CONFIG_PATH="${PKG_CONFIG_PATH}"
if [ "$ARROW_HOME" ] && [ -d "$ARROW_HOME" ]; then
# 1. ARROW_HOME is a directory you've built and installed libarrow into.
# If the env var is set, we use it
_LIBARROW_FOUND="${ARROW_HOME}"
echo "*** Trying Arrow C++ in ARROW_HOME: $_LIBARROW_FOUND"
export PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
elif [ "$ARROW_USE_PKG_CONFIG" != "false" ] && ${PKG_CONFIG} ${PKG_CONFIG_NAME}; then
# 2. Use pkg-config to find arrow on the system
_LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors ${PKG_CONFIG_NAME}`"
echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND"
else
_LIBARROW_FOUND="false"
fi
if [ "$_LIBARROW_FOUND" != "false" ]; then
# We found a library, so check for version mismatch
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
PC_LIB_VERSION=`${PKG_CONFIG} --modversion ${PKG_CONFIG_NAME}`
else
PC_LIB_VERSION=`grep '^Version' ${_LIBARROW_FOUND}/lib/pkgconfig/arrow.pc | sed s/Version:\ //`
fi
# This is in an R script for convenience and testability.
# Success means the found C++ library is ok to use.
# Error means the versions don't line up and we shouldn't use it.
# More specific messaging to the user is in the R script
if ! ${R_HOME}/bin/Rscript tools/check-versions.R $VERSION $PC_LIB_VERSION 2> /dev/null; then
_LIBARROW_FOUND="false"
fi
fi
if [ "$_LIBARROW_FOUND" = "false" ]; then
# We didn't find a suitable library, so reset the pkg-config search path
export PKG_CONFIG_PATH="${OLD_PKG_CONFIG_PATH}"
fi
}
do_bundled_build () {
${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION
if [ $? -ne 0 ]; then
# If the nixlibs.R script failed, we can't continue
echo "------------------------- NOTE ---------------------------"
echo "There was an issue building the Arrow C++ libraries."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "---------------------------------------------------------"
exit 1
fi
if [ -d "libarrow/arrow-$VERSION" ]; then
_LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}"
else
# It's possible that the version of the libarrow binary is not identical to the
# R version, e.g. if the R build is a patch release, so find what the dir is
# actually called. If there is more than one version present, use the one
# with the highest version:
_LIBARROW_FOUND="`pwd`/libarrow/$(ls libarrow/ | grep ^arrow- | tail -n 1)"
fi
# Handle a few special cases, using what we know about the bundled build
# and our ability to make edits to it since we "own" it.
LIB_DIR="${_LIBARROW_FOUND}/lib"
if [ -d "$LIB_DIR" ]; then
if [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
# Use pkg-config to do static linking of libarrow's dependencies
export PKG_CONFIG_PATH="${LIB_DIR}/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
# pkg-config on CentOS 7 doesn't have --define-prefix option.
if ${PKG_CONFIG} --help | grep -- --define-prefix >/dev/null 2>&1; then
# --define-prefix is for binary packages. Binary packages
# uses "/arrow/r/libarrow/dist" as prefix but it doesn't
# match the extracted path. --define-prefix uses a directory
# that arrow.pc exists as its prefix instead of
# "/arrow/r/libarrow/dist".
PKG_CONFIG="${PKG_CONFIG} --define-prefix"
else
# Rewrite prefix= in arrow.pc on CentOS 7.
sed \
-i.bak \
-e "s,prefix=/arrow/r/libarrow/dist,prefix=${LIB_DIR}/..,g" \
${LIB_DIR}/pkgconfig/*.pc
rm -f ${LIB_DIR}/pkgconfig/*.pc.bak
fi
else
# This case must be ARROW_DEPENDENCY_SOURCE=BUNDLED.
# These would be identified by pkg-config, in Requires.private and Libs.private.
# Rather than try to re-implement pkg-config, we can just hard-code them here.
S3_LIBS="-lcurl -lssl -lcrypto"
GCS_LIBS="-lcurl -lssl -lcrypto"
fi
else
# If the library directory does not exist, the script must not have been successful
_LIBARROW_FOUND="false"
fi
}
# Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
# either from pkg-config or by inferring things about the directory in $1
set_pkg_vars () {
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
set_lib_dir_with_pc
else
set_lib_dir_without_pc $1
fi
# Check cmake options for enabled features. This uses LIB_DIR that
# is set by the above set_lib_dir_* call.
add_feature_flags
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
set_pkg_vars_with_pc
else
set_pkg_vars_without_pc $1
fi
# Set any user-defined CXXFLAGS
if [ "$ARROW_R_CXXFLAGS" ]; then
PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
fi
# We use expr because the product version returns more than just 10.13 and we want to
# match the substring. However, expr always outputs the number of matched characters
# to stdout, to avoid noise in the log we redirect the output to /dev/null
if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then
# avoid C++17 availability warnings on macOS < 11
PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY"
fi
}
# If we have pkg-config, it will tell us what libarrow needs
set_lib_dir_with_pc () {
LIB_DIR="`${PKG_CONFIG} --variable=libdir --silence-errors ${PKG_CONFIG_NAME}`"
}
set_pkg_vars_with_pc () {
pkg_config_names="${PKG_CONFIG_NAME} ${PKG_CONFIG_NAMES_FEATURES}"
PKG_CFLAGS="`${PKG_CONFIG} --cflags --silence-errors ${pkg_config_names}` $PKG_CFLAGS"
PKG_CFLAGS="$PKG_CFLAGS $PKG_CFLAGS_FEATURES"
PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other --silence-errors ${pkg_config_names}`
PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES"
PKG_DIRS=`${PKG_CONFIG} --libs-only-L --silence-errors ${pkg_config_names}`
}
# If we don't have pkg-config, we can make some inferences
set_lib_dir_without_pc () {
LIB_DIR="$1/lib"
}
set_pkg_vars_without_pc () {
PKG_CFLAGS="-I$1/include $PKG_CFLAGS $PKG_CFLAGS_FEATURES"
if grep -q "_GLIBCXX_USE_CXX11_ABI=0" "${LIB_DIR}/pkgconfig/arrow.pc"; then
PKG_CFLAGS="${PKG_CFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0"
fi
PKG_DIRS="-L${LIB_DIR}"
if [ "${OPENSSL_ROOT_DIR}" != "" ]; then
PKG_DIRS="${PKG_DIRS} -L${OPENSSL_ROOT_DIR}/lib"
fi
PKG_LIBS="$PKG_LIBS_FEATURES_WITHOUT_PC"
PKG_LIBS="$PKG_LIBS -larrow"
if [ -n "$(find "$LIB_DIR" -name 'libarrow_bundled_dependencies.*')" ]; then
PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies"
fi
PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES"
# If on Raspberry Pi, need to manually link against latomic
# See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example
# pkg-config will handle this for us automatically, see ARROW-6312
if grep raspbian /etc/os-release >/dev/null 2>&1; then
PKG_CFLAGS="$PKG_CFLAGS -DARROW_CXXFLAGS=-latomic"
PKG_LIBS="$PKG_LIBS -latomic"
fi
}
add_feature_flags () {
PKG_CFLAGS_FEATURES=""
PKG_CONFIG_NAMES_FEATURES=""
PKG_LIBS_FEATURES=""
PKG_LIBS_FEATURES_WITHOUT_PC=""
# Now we need to check what features it was built with and enable
# the corresponding feature flags in the R bindings (-DARROW_R_WITH_stuff).
# We do this by inspecting ArrowOptions.cmake, which the libarrow build
# generates.
ARROW_OPTS_CMAKE="$LIB_DIR/cmake/Arrow/ArrowOptions.cmake"
if [ ! -f "${ARROW_OPTS_CMAKE}" ]; then
echo "*** $ARROW_OPTS_CMAKE not found; some features will not be enabled"
else
if arrow_built_with ARROW_PARQUET; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_PARQUET"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES parquet"
PKG_LIBS_FEATURES_WITHOUT_PC="-lparquet $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: parquet is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_DATASET; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_dataset $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_dataset is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_ACERO; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_ACERO"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-acero"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_acero $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_acero is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_SUBSTRAIT; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_SUBSTRAIT"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-substrait"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_substrait $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_substrait is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_JSON; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_JSON"
fi
if arrow_built_with ARROW_S3; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
PKG_LIBS_FEATURES="$PKG_LIBS_FEATURES $S3_LIBS"
fi
if arrow_built_with ARROW_GCS; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
PKG_LIBS_FEATURES="$PKG_LIBS_FEATURES $GCS_LIBS"
fi
fi
}
arrow_built_with() {
# Function to check cmake options for features
grep -i 'set('"$1"' "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1
}
##############
# Main logic #
##############
# First:
find_or_build_libarrow
# We should have a valid libarrow build in $_LIBARROW_FOUND
# Now set `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` based on that.
if [ "$_LIBARROW_FOUND" != "false" ] && [ "$_LIBARROW_FOUND" != "" ]; then
set_pkg_vars ${_LIBARROW_FOUND}
fi
# Test that we can compile something with those flags
CXX17="`${R_HOME}/bin/R CMD config CXX17` -E"
CXX17FLAGS=`"${R_HOME}"/bin/R CMD config CXX17FLAGS`
CXX17STD=`"${R_HOME}"/bin/R CMD config CXX17STD`
CPPFLAGS=`"${R_HOME}"/bin/R CMD config CPPFLAGS`
TEST_CMD="${CXX17} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX17FLAGS} ${CXX17STD} -xc++ -"
echo "#include $PKG_TEST_HEADER" | ${TEST_CMD} >/dev/null 2>&1
if [ $? -eq 0 ]; then
# Prepend PKG_DIRS to PKG_LIBS and write to Makevars
PKG_LIBS="$PKG_DIRS $PKG_LIBS"
echo "PKG_CFLAGS=$PKG_CFLAGS"
echo "PKG_LIBS=$PKG_LIBS"
sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
# Success
exit 0
else
echo "------------------------- NOTE ---------------------------"
echo "There was an issue preparing the Arrow C++ libraries."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "---------------------------------------------------------"
PKG_LIBS=""
PKG_CFLAGS=""
exit 1
fi