blob: 0005f0fdf1923486b9821238dc3acdaeb81d8b7e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more cod ntributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt
cmake_minimum_required(VERSION 3.16)
project(pyarrow)
set(PYARROW_VERSION "14.0.2")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
# Running from a Python sdist tarball
set(LOCAL_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/cmake_modules")
if(EXISTS "${LOCAL_CMAKE_MODULES}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LOCAL_CMAKE_MODULES})
endif()
# Running from a git source tree
set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
if(EXISTS "${CPP_CMAKE_MODULES}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
endif()
if(PYARROW_CPP_HOME)
list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}")
endif()
include(CMakeParseArguments)
# MACOSX_RPATH is enabled by default.
# https://www.cmake.org/cmake/help/latest/policy/CMP0042.html
cmake_policy(SET CMP0042 NEW)
# Only interpret if() arguments as variables or keywords when unquoted.
# https://www.cmake.org/cmake/help/latest/policy/CMP0054.html
cmake_policy(SET CMP0054 NEW)
# RPATH settings on macOS do not affect install_name.
# https://cmake.org/cmake/help/latest/policy/CMP0068.html
if(POLICY CMP0068)
cmake_policy(SET CMP0068 NEW)
endif()
# find_package() uses <PackageName>_ROOT variables.
# https://cmake.org/cmake/help/latest/policy/CMP0074.html
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
# RPATH entries are properly escaped in the intermediary CMake install script.
# https://cmake.org/cmake/help/latest/policy/CMP0095.html
if(POLICY CMP0095)
cmake_policy(SET CMP0095 NEW)
endif()
# Use the first Python installation on PATH, not the newest one
set(Python3_FIND_STRATEGY "LOCATION")
# On Windows, use registry last, not first
set(Python3_FIND_REGISTRY "LAST")
# On macOS, use framework last, not first
set(Python3_FIND_FRAMEWORK "LAST")
# Allow "make install" to not depend on all targets.
#
# Must be declared in the top-level CMakeLists.txt.
set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)
set(CMAKE_MACOSX_RPATH 1)
if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
else()
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.14)
endif()
# Generate a Clang compile_commands.json "compilation database" file for use
# with various development tools, such as Vim's YouCompleteMe plugin.
# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
if(UNIX)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
# In the event that we are bundling the shared libraries (e.g. in a
# manylinux1 wheel), we need to set the RPATH of the extensions to the
# root of the pyarrow/ package so that libarrow is able to be
# loaded properly
if(APPLE)
set(CMAKE_INSTALL_NAME_DIR "@rpath")
set(CMAKE_INSTALL_RPATH "@loader_path/")
else()
set(CMAKE_INSTALL_RPATH "\$ORIGIN")
endif()
endif()
# Top level cmake dir
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF)
option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF)
option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF)
option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF)
option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF)
option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF)
option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF)
option(PYARROW_BUILD_PARQUET_ENCRYPTION
"Build the PyArrow Parquet encryption integration" OFF)
option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF)
option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
set(PYARROW_CXXFLAGS
""
CACHE STRING "Compiler flags to append when compiling Arrow")
endif()
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND
AND NOT CMAKE_C_COMPILER_LAUNCHER
AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
message(STATUS "Using ccache: ${CCACHE_FOUND}")
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
# ARROW-3985: let ccache preserve C++ comments, because some of them may be
# meaningful to the compiler
set(ENV{CCACHE_COMMENTS} "1")
endif()
#
# Compiler flags
#
include(BuildUtils)
# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING
set(BUILD_WARNING_LEVEL "PRODUCTION")
# This must be synchronized with the definition in
# cpp/cmake_modules/DefineOptions.cmake.
if(NOT DEFINED ARROW_SIMD_LEVEL)
set(ARROW_SIMD_LEVEL
"DEFAULT"
CACHE STRING "Compile time SIMD optimization level")
endif()
if(NOT DEFINED ARROW_RUNTIME_SIMD_LEVEL)
set(ARROW_RUNTIME_SIMD_LEVEL
"MAX"
CACHE STRING "Max runtime SIMD optimization level")
endif()
include(SetupCxxFlags)
# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}")
if(MSVC)
# MSVC version of -Wno-return-type-c-linkage
string(APPEND CMAKE_CXX_FLAGS " /wd4190")
# Cython generates some bitshift expressions that MSVC does not like in
# __Pyx_PyFloat_DivideObjC
string(APPEND CMAKE_CXX_FLAGS " /wd4293")
# Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning
# seem harmless, and probably not worth the effort of working around it
string(APPEND CMAKE_CXX_FLAGS " /wd4800")
# See https://github.com/cython/cython/issues/2731. Change introduced in
# Cython 0.29.1 causes "unsafe use of type 'bool' in operation"
string(APPEND CMAKE_CXX_FLAGS " /wd4804")
# See https://github.com/cython/cython/issues/4445.
#
# Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a
# "unused function" warning but the code emits another "function
# call missing argument list" warning.
string(APPEND CMAKE_CXX_FLAGS " /wd4551")
else()
# Enable perf and other tools to work properly
string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer")
# Suppress Cython warnings
string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized")
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
"Clang")
# Cython warnings in clang
string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality")
string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand")
string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations")
string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized")
# We have public Cython APIs which return C++ types, which are in an extern
# "C" blog (no symbol mangling) and clang doesn't like this
string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage")
endif()
endif()
# For any C code, use the same flags.
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
# Add C++-only flags, like -std=c++17
set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
if(MSVC)
# MSVC makes its own output directories based on the build configuration
set(BUILD_SUBDIR_NAME "")
else()
# Set compile output directory
string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
endif()
# If build in-source, create the latest symlink. If build out-of-source, which is
# preferred, simply output the binaries in the build folder
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
set(BUILD_OUTPUT_ROOT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
# Link build/latest to the current build directory, to avoid developers
# accidentally running the latest debug build when in fact they're building
# release builds.
file(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
if(NOT APPLE)
set(MORE_ARGS "-T")
endif()
execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
${CMAKE_CURRENT_BINARY_DIR}/build/latest)
else()
set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
endif()
message(STATUS "Generator: ${CMAKE_GENERATOR}")
message(STATUS "Build output directory: ${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated archives (.a files)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated libraries (.so files)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# Python and Numpy libraries
find_package(Python3Alt REQUIRED)
include(UseCython)
# PyArrow C++
include(GNUInstallDirs)
find_package(Arrow REQUIRED)
set(PYARROW_CPP_ROOT_DIR pyarrow/src)
set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python)
set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/arrow_to_pandas.cc
${PYARROW_CPP_SOURCE_DIR}/benchmark.cc
${PYARROW_CPP_SOURCE_DIR}/common.cc
${PYARROW_CPP_SOURCE_DIR}/datetime.cc
${PYARROW_CPP_SOURCE_DIR}/decimal.cc
${PYARROW_CPP_SOURCE_DIR}/deserialize.cc
${PYARROW_CPP_SOURCE_DIR}/extension_type.cc
${PYARROW_CPP_SOURCE_DIR}/gdb.cc
${PYARROW_CPP_SOURCE_DIR}/helpers.cc
${PYARROW_CPP_SOURCE_DIR}/inference.cc
${PYARROW_CPP_SOURCE_DIR}/init.cc
${PYARROW_CPP_SOURCE_DIR}/io.cc
${PYARROW_CPP_SOURCE_DIR}/ipc.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
${PYARROW_CPP_SOURCE_DIR}/serialize.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
PROPERTIES SKIP_PRECOMPILE_HEADERS ON
SKIP_UNITY_BUILD_INCLUSION ON)
set(PYARROW_CPP_LINK_LIBS "")
#
# Arrow vs PyArrow C++ options
#
# Check all the options from Arrow and PyArrow C++ to be in line
if(PYARROW_BUILD_DATASET)
if(NOT ARROW_DATASET)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON")
endif()
find_package(ArrowDataset REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_static)
endif()
endif()
if(PYARROW_BUILD_ACERO)
if(NOT ARROW_ACERO)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON")
endif()
find_package(ArrowAcero REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_static)
endif()
endif()
if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION)
if(NOT ARROW_PARQUET)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON")
endif()
find_package(Parquet REQUIRED)
endif()
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
if(PARQUET_REQUIRE_ENCRYPTION)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
endif()
message(STATUS "Parquet Encryption Enabled")
else()
message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
endif()
else()
message(STATUS "Parquet Encryption is NOT Enabled")
endif()
if(PYARROW_BUILD_HDFS)
if(NOT ARROW_HDFS)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
endif()
endif()
# Check for only Arrow C++ options
if(ARROW_CSV)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/csv.cc)
endif()
if(ARROW_FILESYSTEM)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/filesystem.cc)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set_property(SOURCE ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
APPEND_STRING
PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
endif()
if(NOT PYARROW_CPP_LINK_LIBS)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_static)
endif()
endif()
add_library(arrow_python SHARED ${PYARROW_CPP_SRCS})
target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR}
${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src)
if(NOT CMAKE_VERSION VERSION_LESS 3.16)
target_precompile_headers(arrow_python PUBLIC
"$<$<COMPILE_LANGUAGE:CXX>:arrow/python/pch.h>")
endif()
target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS} Python3::NumPy)
target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING)
install(TARGETS arrow_python
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
if(PYARROW_BUILD_FLIGHT)
if(NOT ARROW_FLIGHT)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON")
endif()
# Must link to shared libarrow_flight: we don't want to link more than one
# copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
# fail with weird errors due to multiple copies of global static state (The
# other solution is to link gRPC shared everywhere instead of statically only
# in Flight)
if(NOT ARROW_BUILD_SHARED)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_BUILD_SHARED=ON")
endif()
find_package(ArrowFlight REQUIRED)
add_library(arrow_python_flight SHARED ${PYARROW_CPP_FLIGHT_SRCS})
target_link_libraries(arrow_python_flight PUBLIC arrow_python
ArrowFlight::arrow_flight_shared)
target_compile_definitions(arrow_python_flight PRIVATE ARROW_PYFLIGHT_EXPORTING)
install(TARGETS arrow_python_flight
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# Clang, be quiet. Python C API has lots of macros
set_property(SOURCE ${PYARROW_CPP_SRCS} ${PYARROW_CPP_FLIGHT_SRCS}
APPEND_STRING
PROPERTY COMPILE_FLAGS -Wno-parentheses-equality)
endif()
install(DIRECTORY ${PYARROW_CPP_SOURCE_DIR}/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python
FILES_MATCHING
PATTERN "*internal.h" EXCLUDE
PATTERN "*.h")
function(bundle_arrow_lib library_path)
set(options)
set(one_value_args SO_VERSION)
set(multi_value_args)
cmake_parse_arguments(ARG
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})
if(ARG_UNPARSED_ARGUMENTS)
message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
endif()
get_filename_component(LIBRARY_PATH_REAL ${library_path} REALPATH)
get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)
# Only copy the shared library with ABI version on Linux and macOS
if(MSVC)
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
elseif(APPLE)
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION})
endif()
endfunction()
function(bundle_arrow_import_lib library_path)
get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)
install(FILES ${library_path}
DESTINATION "."
RENAME ${LIBRARY_NAME}.lib)
endfunction()
function(bundle_arrow_dependency library_name)
if(MSVC)
if(DEFINED ENV{CONDA_PREFIX})
file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}\\Library" SHARED_LIB_HOME)
endif()
else()
if(DEFINED ENV{CONDA_PREFIX})
file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}" SHARED_LIB_HOME)
endif()
endif()
if(DEFINED ENV{${library_name}_HOME})
file(TO_CMAKE_PATH "$ENV{${library_name}_HOME}" SHARED_LIB_HOME)
endif()
arrow_build_shared_library_name(shared_lib_name "${library_name}")
unset(SHARED_LIB_PATH CACHE)
if(MSVC)
set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
# .dll isn't found by find_library with MSVC because .dll isn't included in
# CMAKE_FIND_LIBRARY_SUFFIXES.
list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
endif()
if(SHARED_LIB_HOME)
find_library(SHARED_LIB_PATH
NAMES "${shared_lib_name}"
PATHS "${SHARED_LIB_HOME}"
PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)
else()
find_library(SHARED_LIB_PATH
NAMES "${shared_lib_name}"
PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES})
endif()
if(MSVC)
set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
endif()
if(SHARED_LIB_PATH)
get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}"
)
install(FILES ${SHARED_LIB_REALPATH}
DESTINATION "."
RENAME ${SHARED_LIB_NAME})
else()
message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
endif()
endfunction()
# Always bundle includes
get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL ${ARROW_INCLUDE_DIR}/arrow REALPATH)
install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(PYARROW_BUNDLE_ARROW_CPP)
# Arrow
bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
# TODO(kszucs): locate msvcp140.dll in a portable fashion and bundle it
bundle_arrow_import_lib(${ARROW_IMPORT_LIB})
endif()
endif()
#
# Cython modules
#
set(CYTHON_EXTENSIONS
lib
_compute
_csv
_feather
_fs
_hdfsio
_json
_pyarrow_cpp_tests)
set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE)
set(LINK_LIBS arrow_python)
if(PYARROW_BUILD_GCS)
list(APPEND CYTHON_EXTENSIONS _gcsfs)
endif()
if(PYARROW_BUILD_S3)
list(APPEND CYTHON_EXTENSIONS _s3fs)
endif()
if(PYARROW_BUILD_HDFS)
list(APPEND CYTHON_EXTENSIONS _hdfs)
endif()
if(PYARROW_BUILD_CUDA)
# Arrow CUDA
if(NOT ARROW_CUDA)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON")
endif()
find_package(ArrowCUDA REQUIRED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_CUDA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_CUDA_IMPORT_LIB})
endif()
endif()
set(CUDA_LINK_LIBS ArrowCUDA::arrow_cuda_shared)
list(APPEND CYTHON_EXTENSIONS _cuda)
set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE)
endif()
# Acero
if(PYARROW_BUILD_ACERO)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB})
endif()
endif()
set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared)
list(APPEND CYTHON_EXTENSIONS _acero)
endif()
# Dataset
if(PYARROW_BUILD_DATASET)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB})
endif()
endif()
set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared)
list(APPEND CYTHON_EXTENSIONS _dataset)
endif()
# Parquet
if(PYARROW_BUILD_PARQUET)
if(PYARROW_BUNDLE_ARROW_CPP)
get_filename_component(PARQUET_INCLUDE_PARQUET_DIR_REAL
${PARQUET_INCLUDE_DIR}/parquet REALPATH)
install(DIRECTORY ${PARQUET_INCLUDE_PARQUET_DIR_REAL}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${PARQUET_SHARED_LIB} SO_VERSION ${PARQUET_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${PARQUET_IMPORT_LIB})
endif()
endif()
set(PARQUET_LINK_LIBS Parquet::parquet_shared)
else()
set(PARQUET_LINK_LIBS Parquet::parquet_static)
endif()
list(APPEND CYTHON_EXTENSIONS _parquet)
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
list(APPEND CYTHON_EXTENSIONS _parquet_encryption)
endif()
if(PYARROW_BUILD_DATASET)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet)
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet_encryption)
endif()
endif()
endif()
if(PYARROW_BUILD_ORC)
# ORC
if(NOT ARROW_ORC)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _orc)
if(PYARROW_BUILD_DATASET)
list(APPEND CYTHON_EXTENSIONS _dataset_orc)
endif()
endif()
# Flight
if(PYARROW_BUILD_FLIGHT)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_FLIGHT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_FLIGHT_IMPORT_LIB})
# XXX Hardcoded library names because CMake is too stupid to give us
# the shared library paths.
# https://gitlab.kitware.com/cmake/cmake/issues/16210
# bundle_arrow_dependency(libcrypto-1_1-x64)
# bundle_arrow_dependency(libssl-1_1-x64)
endif()
endif()
set(FLIGHT_LINK_LIBS arrow_python_flight)
list(APPEND CYTHON_EXTENSIONS _flight)
else()
set(FLIGHT_LINK_LIBS "")
endif()
# Substrait
if(PYARROW_BUILD_SUBSTRAIT)
if(NOT ARROW_SUBSTRAIT)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON")
endif()
find_package(ArrowSubstrait REQUIRED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB})
endif()
endif()
set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared)
list(APPEND CYTHON_EXTENSIONS _substrait)
endif()
# Gandiva
if(PYARROW_BUILD_GANDIVA)
if(NOT ARROW_GANDIVA)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON")
endif()
find_package(Gandiva REQUIRED)
if(PYARROW_BUNDLE_ARROW_CPP)
get_filename_component(GANDIVA_INCLUDE_GANDIVA_DIR_REAL
${GANDIVA_INCLUDE_DIR}/gandiva REALPATH)
install(DIRECTORY ${GANDIVA_INCLUDE_GANDIVA_DIR_REAL}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
bundle_arrow_lib(${GANDIVA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${GANDIVA_IMPORT_LIB})
endif()
endif()
set(GANDIVA_LINK_LIBS Gandiva::gandiva_shared)
list(APPEND CYTHON_EXTENSIONS gandiva)
endif()
#
# Setup and build Cython modules
#
if(PYARROW_GENERATE_COVERAGE)
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "-Xlinetrace=True")
endif()
# Error on any warnings not already explicitly ignored.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")
foreach(module ${CYTHON_EXTENSIONS})
string(REPLACE "." ";" directories ${module})
list(GET directories -1 module_name)
list(REMOVE_AT directories -1)
string(REPLACE "." "/" module_root "${module}")
set(module_SRC pyarrow/${module_root}.pyx)
set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX TRUE)
cython_add_module(${module_name} ${module_name}_pyx ${module_name}_output ${module_SRC})
if(directories)
string(REPLACE ";" "/" module_output_directory ${directories})
set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${module_output_directory})
endif()
# XXX(wesm): ARROW-2326 this logic is only needed when we have Cython
# modules in interior directories. Since all of our C extensions and
# bundled libraries are in the same place, we can skip this part
# list(LENGTH directories i)
# while(${i} GREATER 0)
# set(module_install_rpath "${module_install_rpath}/..")
# math(EXPR i "${i} - 1" )
# endwhile(${i} GREATER 0)
if(PYARROW_GENERATE_COVERAGE)
set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS
"CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
endif()
target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})
install(TARGETS ${module_name} LIBRARY DESTINATION ".")
foreach(output ${${module_name}_output})
if(output MATCHES "\\.${CYTHON_CXX_EXTENSION}$")
if(NOT PYARROW_BUNDLE_CYTHON_CPP)
continue()
endif()
endif()
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${output} DESTINATION ".")
endforeach()
endforeach()
set(ARROW_PYTHON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src/arrow/python")
file(MAKE_DIRECTORY ${ARROW_PYTHON_BINARY_DIR})
add_custom_command(OUTPUT "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
"${ARROW_PYTHON_BINARY_DIR}/lib.h"
COMMAND ${CMAKE_COMMAND} -E copy
"${CMAKE_CURRENT_BINARY_DIR}/lib_api.h"
"${CMAKE_CURRENT_BINARY_DIR}/lib.h"
"${ARROW_PYTHON_BINARY_DIR}/"
DEPENDS lib_pyx)
add_custom_target(cython_api_headers DEPENDS "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
"${ARROW_PYTHON_BINARY_DIR}/lib.h")
add_dependencies(arrow_python cython_api_headers)
install(FILES "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" "${ARROW_PYTHON_BINARY_DIR}/lib.h"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python)
# Additional link libraries
if(PYARROW_BUILD_CUDA)
target_link_libraries(_cuda PRIVATE ${CUDA_LINK_LIBS})
endif()
if(PYARROW_BUILD_FLIGHT)
target_link_libraries(_flight PRIVATE ${FLIGHT_LINK_LIBS})
endif()
if(PYARROW_BUILD_SUBSTRAIT)
target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS})
endif()
if(PYARROW_BUILD_ACERO)
target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS})
endif()
if(PYARROW_BUILD_DATASET)
target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
if(PYARROW_BUILD_ORC)
target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS})
endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_dataset_parquet PRIVATE ${DATASET_LINK_LIBS})
endif()
endif()
if(PYARROW_BUILD_GANDIVA)
target_link_libraries(gandiva PRIVATE ${GANDIVA_LINK_LIBS})
endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
endif()
endif()