blob: d80f21333e7927e69c69cba8c9ffd11b4f38369f [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt
cmake_minimum_required(VERSION 3.25)
project(pyarrow)
# This is needed for 3.13 free-threading. CMake used to add Python
# include directories with `-isystem`, which led to some Python-internal
# includes to resolve to normal 3.13 includes (cause -isystem includes
# are searched after system directories), instead of 3.13-freethreading,
# which in turn meant that Py_GIL_DISABLED was not set.
set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
set(PYARROW_VERSION "23.0.0-SNAPSHOT")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
# Generate SO version and full SO version
project(pyarrow VERSION "${PYARROW_BASE_VERSION}")
set(PYARROW_VERSION_MAJOR "${pyarrow_VERSION_MAJOR}")
set(PYARROW_VERSION_MINOR "${pyarrow_VERSION_MINOR}")
set(PYARROW_VERSION_PATCH "${pyarrow_VERSION_PATCH}")
# pyarrow 1.x.y => SO version is "10x", full SO version is "10x.y.0"
# Example: for 18.0.0 --> PYARROW_SO_VERSION=1800, PYARROW_FULL_SO_VERSION=1800.0.0
math(EXPR PYARROW_SO_VERSION "${PYARROW_VERSION_MAJOR} * 100 + ${PYARROW_VERSION_MINOR}")
set(PYARROW_FULL_SO_VERSION "${PYARROW_SO_VERSION}.${PYARROW_VERSION_PATCH}.0")
# Running from a Python sdist tarball
set(LOCAL_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/cmake_modules")
if(EXISTS "${LOCAL_CMAKE_MODULES}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LOCAL_CMAKE_MODULES})
endif()
# Running from a git source tree
set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
if(EXISTS "${CPP_CMAKE_MODULES}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
endif()
if(PYARROW_CPP_HOME)
list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}")
endif()
include(CMakeParseArguments)
# MACOSX_RPATH is enabled by default.
# https://www.cmake.org/cmake/help/latest/policy/CMP0042.html
cmake_policy(SET CMP0042 NEW)
# Only interpret if() arguments as variables or keywords when unquoted.
# https://www.cmake.org/cmake/help/latest/policy/CMP0054.html
cmake_policy(SET CMP0054 NEW)
# RPATH settings on macOS do not affect install_name.
# https://cmake.org/cmake/help/latest/policy/CMP0068.html
if(POLICY CMP0068)
cmake_policy(SET CMP0068 NEW)
endif()
# find_package() uses <PackageName>_ROOT variables.
# https://cmake.org/cmake/help/latest/policy/CMP0074.html
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
# RPATH entries are properly escaped in the intermediary CMake install script.
# https://cmake.org/cmake/help/latest/policy/CMP0095.html
if(POLICY CMP0095)
cmake_policy(SET CMP0095 NEW)
endif()
# Use the first Python installation on PATH, not the newest one
set(Python3_FIND_STRATEGY "LOCATION")
# On Windows, use registry last, not first
set(Python3_FIND_REGISTRY "LAST")
# On macOS, use framework last, not first
set(Python3_FIND_FRAMEWORK "LAST")
# Allow "make install" to not depend on all targets.
#
# Must be declared in the top-level CMakeLists.txt.
set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)
set(CMAKE_MACOSX_RPATH 1)
if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
else()
set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
endif()
# Generate a Clang compile_commands.json "compilation database" file for use
# with various development tools, such as Vim's YouCompleteMe plugin.
# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
if(UNIX)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
# In the event that we are bundling the shared libraries (e.g. in a
# manylinux1 wheel), we need to set the RPATH of the extensions to the
# root of the pyarrow/ package so that libarrow is able to be
# loaded properly
if(APPLE)
set(CMAKE_INSTALL_NAME_DIR "@rpath")
set(CMAKE_INSTALL_RPATH "@loader_path/")
else()
set(CMAKE_INSTALL_RPATH "\$ORIGIN")
endif()
endif()
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND
AND NOT CMAKE_C_COMPILER_LAUNCHER
AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
message(STATUS "Using ccache: ${CCACHE_FOUND}")
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
# ARROW-3985: let ccache preserve C++ comments, because some of them may be
# meaningful to the compiler
set(ENV{CCACHE_COMMENTS} "1")
endif()
#
# Compiler flags
#
include(BuildUtils)
# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING
set(BUILD_WARNING_LEVEL "PRODUCTION")
# This must be synchronized with the definition in
# cpp/cmake_modules/DefineOptions.cmake.
if(NOT DEFINED ARROW_SIMD_LEVEL)
set(ARROW_SIMD_LEVEL
"DEFAULT"
CACHE STRING "Compile time SIMD optimization level")
endif()
if(NOT DEFINED ARROW_RUNTIME_SIMD_LEVEL)
set(ARROW_RUNTIME_SIMD_LEVEL
"MAX"
CACHE STRING "Max runtime SIMD optimization level")
endif()
include(SetupCxxFlags)
if($ENV{PYODIDE})
# These variables are needed for building PyArrow on Emscripten.
# If they aren't set, CMake cross compiling fails for Python
# modules (at least under Pyodide it does).
set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE})
set(Python3_LIBRARY $ENV{CPYTHONLIB})
set(Python3_EXECUTABLE)
execute_process(COMMAND ${Python3_EXECUTABLE} -c
"import numpy; print(numpy.__version__)"
OUTPUT_VARIABLE PYODIDE_NUMPY_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX MATCH "^([0-9]+)" PYODIDE_NUMPY_MAJOR_VERSION ${PYODIDE_NUMPY_VERSION})
if(PYODIDE_NUMPY_MAJOR_VERSION GREATER_EQUAL 2)
set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/_core/include)
else()
set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include)
endif()
set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME})
# we set the c and cxx compiler manually to bypass pywasmcross
# which is pyodide's way of messing with C++ build parameters.
set(CMAKE_C_COMPILER emcc)
set(CMAKE_CXX_COMPILER em++)
endif()
# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}")
if(MSVC)
# MSVC version of -Wno-return-type-c-linkage
string(APPEND CMAKE_CXX_FLAGS " /wd4190")
# Cython generates some bitshift expressions that MSVC does not like in
# __Pyx_PyFloat_DivideObjC
string(APPEND CMAKE_CXX_FLAGS " /wd4293")
# Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning
# seem harmless, and probably not worth the effort of working around it
string(APPEND CMAKE_CXX_FLAGS " /wd4800")
# See https://github.com/cython/cython/issues/4445.
#
# Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a
# "unused function" warning but the code emits another "function
# call missing argument list" warning.
string(APPEND CMAKE_CXX_FLAGS " /wd4551")
else()
# Enable perf and other tools to work properly
string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer")
# Suppress Cython warnings
string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized")
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
"Clang")
# Cython warnings in clang
string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality")
string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand")
string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations")
string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized")
# We have public Cython APIs which return C++ types, which are in an extern
# "C" blog (no symbol mangling) and clang doesn't like this
string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage")
endif()
endif()
# For any C code, use the same flags.
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
# Add C++-only flags, like -std=c++17
set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
if(MSVC)
# MSVC makes its own output directories based on the build configuration
set(BUILD_SUBDIR_NAME "")
else()
# Set compile output directory
string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
endif()
# If build in-source, create the latest symlink. If build out-of-source, which is
# preferred, simply output the binaries in the build folder
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
set(BUILD_OUTPUT_ROOT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
# Link build/latest to the current build directory, to avoid developers
# accidentally running the latest debug build when in fact they're building
# release builds.
file(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
if(NOT APPLE)
set(MORE_ARGS "-T")
endif()
execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
${CMAKE_CURRENT_BINARY_DIR}/build/latest)
else()
set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
endif()
message(STATUS "Generator: ${CMAKE_GENERATOR}")
message(STATUS "Build output directory: ${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated archives (.a files)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated libraries (.so files)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
# Python and Numpy libraries
find_package(Python3Alt REQUIRED)
message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}")
message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")
include(UseCython)
message(STATUS "Found Cython version: ${CYTHON_VERSION}")
# Arrow C++ and set default PyArrow build options
include(GNUInstallDirs)
find_package(Arrow REQUIRED)
macro(define_option name description arrow_option)
set("PYARROW_${name}"
"AUTO"
CACHE STRING ${description})
if("${PYARROW_${name}}" STREQUAL "AUTO")
# by default, first check if env variable exists, otherwise use Arrow C++ config
set(env_variable "PYARROW_WITH_${name}")
if(DEFINED ENV{${env_variable}})
if($ENV{${env_variable}})
set("PYARROW_BUILD_${name}" ON)
else()
set("PYARROW_BUILD_${name}" OFF)
endif()
else()
if(${arrow_option})
set("PYARROW_BUILD_${name}" ON)
else()
set("PYARROW_BUILD_${name}" OFF)
endif()
endif()
else()
if("${PYARROW_${name}}")
set("PYARROW_BUILD_${name}" ON)
else()
set("PYARROW_BUILD_${name}" OFF)
endif()
endif()
endmacro()
define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO)
define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA)
define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET)
define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT)
define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA)
define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC)
define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET)
define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration"
PARQUET_REQUIRE_ENCRYPTION)
define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT)
define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE)
define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS)
define_option(S3 "Build the PyArrow S3 integration" ARROW_S3)
define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS)
option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF)
option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
set(PYARROW_CXXFLAGS
""
CACHE STRING "Compiler flags to append when compiling PyArrow C++")
# enforce module dependencies
if(PYARROW_BUILD_SUBSTRAIT)
set(PYARROW_BUILD_DATASET ON)
endif()
if(PYARROW_BUILD_DATASET)
set(PYARROW_BUILD_ACERO ON)
endif()
# PyArrow C++
set(PYARROW_CPP_ROOT_DIR pyarrow/src)
set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python)
# Write out compile-time configuration constants
string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE)
configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake"
"${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES)
set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/arrow_to_pandas.cc
${PYARROW_CPP_SOURCE_DIR}/benchmark.cc
${PYARROW_CPP_SOURCE_DIR}/common.cc
${PYARROW_CPP_SOURCE_DIR}/config.cc
${PYARROW_CPP_SOURCE_DIR}/datetime.cc
${PYARROW_CPP_SOURCE_DIR}/decimal.cc
${PYARROW_CPP_SOURCE_DIR}/extension_type.cc
${PYARROW_CPP_SOURCE_DIR}/gdb.cc
${PYARROW_CPP_SOURCE_DIR}/helpers.cc
${PYARROW_CPP_SOURCE_DIR}/inference.cc
${PYARROW_CPP_SOURCE_DIR}/io.cc
${PYARROW_CPP_SOURCE_DIR}/ipc.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc
${PYARROW_CPP_SOURCE_DIR}/util.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)
set(PYARROW_CPP_LINK_LIBS "")
#
# Arrow vs PyArrow C++ options
#
# Check all the options from Arrow and PyArrow C++ to be in line
#
# Order is important for "NOT ARROW_BUILD_SHARED". We must use
# depending modules -> depended modules order. For example,
# ArrowSubstrait depends on ArrowDataset. So PYARROW_CPP_LINK_LIBS
# must use
# "ArrowSubstrait::arrow_substrait_static;ArrowDataset::arrow_dataset_static"
# order.
if(PYARROW_BUILD_SUBSTRAIT)
message(STATUS "Building PyArrow with Substrait")
if(NOT ARROW_SUBSTRAIT)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON")
endif()
find_package(ArrowSubstrait REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_static)
endif()
endif()
if(PYARROW_BUILD_DATASET)
message(STATUS "Building PyArrow with Dataset")
if(NOT ARROW_DATASET)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON")
endif()
find_package(ArrowDataset REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_static)
endif()
endif()
if(PYARROW_BUILD_ACERO)
message(STATUS "Building PyArrow with Acero")
if(NOT ARROW_ACERO)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON")
endif()
find_package(ArrowAcero REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_static)
endif()
endif()
# Currently PyArrow cannot be built without ARROW_COMPUTE
if(NOT ARROW_COMPUTE)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_COMPUTE=ON")
else()
message(STATUS "Building PyArrow with Compute")
find_package(ArrowCompute REQUIRED)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_static)
endif()
endif()
if(PYARROW_BUILD_PARQUET)
message(STATUS "Building PyArrow with Parquet")
if(NOT ARROW_PARQUET)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON")
endif()
find_package(Parquet REQUIRED)
else()
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting."
)
set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF)
endif()
endif()
# Check for only Arrow C++ options
if(ARROW_CSV)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/csv.cc)
else()
message(FATAL_ERROR "You must build Arrow C++ with ARROW_CSV=ON")
endif()
if(ARROW_FILESYSTEM)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/filesystem.cc)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set_property(SOURCE ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
APPEND_STRING
PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
endif()
if(NOT PYARROW_CPP_LINK_LIBS)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_static)
endif()
endif()
add_library(arrow_python SHARED ${PYARROW_CPP_SRCS})
target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR}
${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src)
# on static builds we need to be careful not to link PYARROW_CPP_LINK_LIBS
# into everything depending on arrow_python, or else we get duplicate
# libraries. Whereas conversely on shared builds, we need everything
# to depend on everything, as python loads modules separately
if(ARROW_BUILD_SHARED)
target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS})
else()
target_link_libraries(arrow_python PRIVATE ${PYARROW_CPP_LINK_LIBS})
endif()
target_link_libraries(arrow_python PUBLIC Python3::NumPy)
target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING)
set_target_properties(arrow_python PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}"
SOVERSION "${PYARROW_SO_VERSION}")
install(TARGETS arrow_python
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
message(STATUS "Parquet Encryption is NOT Enabled")
else()
if(PARQUET_REQUIRE_ENCRYPTION)
add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS})
target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
${PARQUET_LINK_LIBS})
target_compile_definitions(arrow_python_parquet_encryption
PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
set_target_properties(arrow_python_parquet_encryption
PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}"
SOVERSION "${PYARROW_SO_VERSION}")
install(TARGETS arrow_python_parquet_encryption
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
message(STATUS "Parquet Encryption Enabled")
else()
message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
endif()
endif()
set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
if(PYARROW_BUILD_FLIGHT)
message(STATUS "Building PyArrow with Flight")
if(NOT ARROW_FLIGHT)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON")
endif()
# Must link to shared libarrow_flight: we don't want to link more than one
# copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
# fail with weird errors due to multiple copies of global static state (The
# other solution is to link gRPC shared everywhere instead of statically only
# in Flight)
if(NOT ARROW_BUILD_SHARED)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_BUILD_SHARED=ON")
endif()
find_package(ArrowFlight REQUIRED)
add_library(arrow_python_flight SHARED ${PYARROW_CPP_FLIGHT_SRCS})
target_link_libraries(arrow_python_flight PUBLIC arrow_python
ArrowFlight::arrow_flight_shared)
target_compile_definitions(arrow_python_flight PRIVATE ARROW_PYFLIGHT_EXPORTING)
set_target_properties(arrow_python_flight
PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}"
SOVERSION "${PYARROW_SO_VERSION}")
install(TARGETS arrow_python_flight
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# Clang, be quiet. Python C API has lots of macros
set_property(SOURCE ${PYARROW_CPP_SRCS} ${PYARROW_CPP_FLIGHT_SRCS}
APPEND_STRING
PROPERTY COMPILE_FLAGS -Wno-parentheses-equality)
endif()
install(DIRECTORY ${PYARROW_CPP_SOURCE_DIR}/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python
FILES_MATCHING
PATTERN "*internal.h" EXCLUDE
PATTERN "*.h")
function(bundle_arrow_lib library_path)
set(options)
set(one_value_args SO_VERSION)
set(multi_value_args)
cmake_parse_arguments(ARG
"${options}"
"${one_value_args}"
"${multi_value_args}"
${ARGN})
if(ARG_UNPARSED_ARGUMENTS)
message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
endif()
get_filename_component(LIBRARY_PATH_REAL ${library_path} REALPATH)
get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)
# Only copy the shared library with ABI version on Linux and macOS
if(MSVC)
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
elseif(APPLE)
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
install(FILES ${LIBRARY_PATH_REAL}
DESTINATION "."
RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION})
endif()
endfunction()
function(bundle_arrow_import_lib library_path)
get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)
install(FILES ${library_path}
DESTINATION "."
RENAME ${LIBRARY_NAME}.lib)
endfunction()
function(bundle_arrow_dependency library_name)
if(MSVC)
if(DEFINED ENV{CONDA_PREFIX})
file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}\\Library" SHARED_LIB_HOME)
endif()
else()
if(DEFINED ENV{CONDA_PREFIX})
file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}" SHARED_LIB_HOME)
endif()
endif()
if(DEFINED ENV{${library_name}_HOME})
file(TO_CMAKE_PATH "$ENV{${library_name}_HOME}" SHARED_LIB_HOME)
endif()
arrow_build_shared_library_name(shared_lib_name "${library_name}")
unset(SHARED_LIB_PATH CACHE)
if(MSVC)
set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
# .dll isn't found by find_library with MSVC because .dll isn't included in
# CMAKE_FIND_LIBRARY_SUFFIXES.
list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
endif()
if(SHARED_LIB_HOME)
find_library(SHARED_LIB_PATH
NAMES "${shared_lib_name}"
PATHS "${SHARED_LIB_HOME}"
PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)
else()
find_library(SHARED_LIB_PATH
NAMES "${shared_lib_name}"
PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES})
endif()
if(MSVC)
set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
endif()
if(SHARED_LIB_PATH)
get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}"
)
install(FILES ${SHARED_LIB_REALPATH}
DESTINATION "."
RENAME ${SHARED_LIB_NAME})
else()
message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
endif()
endfunction()
# Always bundle includes
get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL ${ARROW_INCLUDE_DIR}/arrow REALPATH)
install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(PYARROW_BUNDLE_ARROW_CPP)
# Arrow and Compute
bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
bundle_arrow_lib(${ARROW_COMPUTE_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_IMPORT_LIB})
bundle_arrow_import_lib(${ARROW_COMPUTE_IMPORT_LIB})
endif()
endif()
#
# Cython modules
#
set(CYTHON_EXTENSIONS
lib
_compute
_csv
_feather
_fs
_json
_pyarrow_cpp_tests)
set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE)
set(LINK_LIBS arrow_python)
if(PYARROW_BUILD_AZURE)
message(STATUS "Building PyArrow with Azure")
if(NOT ARROW_AZURE)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _azurefs)
endif()
if(PYARROW_BUILD_GCS)
message(STATUS "Building PyArrow with GCS")
if(NOT ARROW_GCS)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _gcsfs)
endif()
if(PYARROW_BUILD_S3)
message(STATUS "Building PyArrow with S3")
if(NOT ARROW_S3)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _s3fs)
endif()
if(PYARROW_BUILD_HDFS)
message(STATUS "Building PyArrow with HDFS")
if(NOT ARROW_HDFS)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _hdfs)
endif()
if(PYARROW_BUILD_CUDA)
message(STATUS "Building PyArrow with CUDA")
if(NOT ARROW_CUDA)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON")
endif()
find_package(ArrowCUDA REQUIRED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_CUDA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_CUDA_IMPORT_LIB})
endif()
endif()
set(CUDA_LINK_LIBS ArrowCUDA::arrow_cuda_shared)
list(APPEND CYTHON_EXTENSIONS _cuda)
set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE)
endif()
# Acero
if(PYARROW_BUILD_ACERO)
if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB})
endif()
endif()
set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared)
else()
# Acero is statically linked into libarrow_python already
set(ACERO_LINK_LIBS)
endif()
list(APPEND CYTHON_EXTENSIONS _acero)
endif()
# Dataset
if(PYARROW_BUILD_DATASET)
if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB})
endif()
endif()
set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared)
else()
# dataset is statically linked into libarrow_python already
set(DATASET_LINK_LIBS)
endif()
list(APPEND CYTHON_EXTENSIONS _dataset)
endif()
# Parquet
if(PYARROW_BUILD_PARQUET)
if(PYARROW_BUNDLE_ARROW_CPP)
get_filename_component(PARQUET_INCLUDE_PARQUET_DIR_REAL
${PARQUET_INCLUDE_DIR}/parquet REALPATH)
install(DIRECTORY ${PARQUET_INCLUDE_PARQUET_DIR_REAL}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${PARQUET_SHARED_LIB} SO_VERSION ${PARQUET_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${PARQUET_IMPORT_LIB})
endif()
endif()
set(PARQUET_LINK_LIBS Parquet::parquet_shared)
else()
# don't link the static lib as it is
# already in arrow_python
set(PARQUET_LINK_LIBS)
endif()
list(APPEND CYTHON_EXTENSIONS _parquet)
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
list(APPEND CYTHON_EXTENSIONS _parquet_encryption)
endif()
if(PYARROW_BUILD_DATASET)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet)
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet_encryption)
endif()
endif()
endif()
# ORC
if(PYARROW_BUILD_ORC)
message(STATUS "Building PyArrow with ORC")
if(NOT ARROW_ORC)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON")
endif()
list(APPEND CYTHON_EXTENSIONS _orc)
if(PYARROW_BUILD_DATASET)
list(APPEND CYTHON_EXTENSIONS _dataset_orc)
endif()
endif()
# Flight
if(PYARROW_BUILD_FLIGHT)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_FLIGHT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_FLIGHT_IMPORT_LIB})
# XXX Hardcoded library names because CMake is too stupid to give us
# the shared library paths.
# https://gitlab.kitware.com/cmake/cmake/issues/16210
# bundle_arrow_dependency(libcrypto-1_1-x64)
# bundle_arrow_dependency(libssl-1_1-x64)
endif()
endif()
set(FLIGHT_LINK_LIBS arrow_python_flight)
list(APPEND CYTHON_EXTENSIONS _flight)
else()
set(FLIGHT_LINK_LIBS "")
endif()
# Substrait
if(PYARROW_BUILD_SUBSTRAIT)
message(STATUS "Building PyArrow with Substrait")
if(ARROW_BUILD_SHARED)
if(PYARROW_BUNDLE_ARROW_CPP)
bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB})
endif()
endif()
set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared)
else()
# Arrow Substrait is statically linked into libarrow_python already
set(SUBSTRAIT_LINK_LIBS)
endif()
list(APPEND CYTHON_EXTENSIONS _substrait)
endif()
# Gandiva
if(PYARROW_BUILD_GANDIVA)
message(STATUS "Building PyArrow with Gandiva")
if(NOT ARROW_GANDIVA)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON")
endif()
find_package(Gandiva REQUIRED)
if(PYARROW_BUNDLE_ARROW_CPP)
get_filename_component(GANDIVA_INCLUDE_GANDIVA_DIR_REAL
${GANDIVA_INCLUDE_DIR}/gandiva REALPATH)
install(DIRECTORY ${GANDIVA_INCLUDE_GANDIVA_DIR_REAL}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
bundle_arrow_lib(${GANDIVA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
if(MSVC)
bundle_arrow_import_lib(${GANDIVA_IMPORT_LIB})
endif()
endif()
set(GANDIVA_LINK_LIBS Gandiva::gandiva_shared)
list(APPEND CYTHON_EXTENSIONS gandiva)
endif()
#
# Setup and build Cython modules
#
if(PYARROW_GENERATE_COVERAGE)
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "-Xlinetrace=True")
endif()
# Error on any warnings not already explicitly ignored.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")
# GH-40236: make generated C++ code easier to compile by disabling an
# undocumented Cython feature.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback")
if(CYTHON_VERSION VERSION_GREATER_EQUAL "3.1.0a0")
list(APPEND CYTHON_FLAGS "-Xfreethreading_compatible=True")
endif()
foreach(module ${CYTHON_EXTENSIONS})
string(REPLACE "." ";" directories ${module})
list(GET directories -1 module_name)
list(REMOVE_AT directories -1)
string(REPLACE "." "/" module_root "${module}")
set(module_SRC pyarrow/${module_root}.pyx)
set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX TRUE)
cython_add_module(${module_name} ${module_name}_pyx ${module_name}_output ${module_SRC})
if(directories)
string(REPLACE ";" "/" module_output_directory ${directories})
set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${module_output_directory})
endif()
# XXX(wesm): ARROW-2326 this logic is only needed when we have Cython
# modules in interior directories. Since all of our C extensions and
# bundled libraries are in the same place, we can skip this part
# list(LENGTH directories i)
# while(${i} GREATER 0)
# set(module_install_rpath "${module_install_rpath}/..")
# math(EXPR i "${i} - 1" )
# endwhile(${i} GREATER 0)
if(PYARROW_GENERATE_COVERAGE)
set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS
"CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
endif()
target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})
install(TARGETS ${module_name} LIBRARY DESTINATION ".")
foreach(output ${${module_name}_output})
if(output MATCHES "\\.${CYTHON_CXX_EXTENSION}$")
if(NOT PYARROW_BUNDLE_CYTHON_CPP)
continue()
endif()
endif()
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${output} DESTINATION ".")
endforeach()
endforeach()
set(ARROW_PYTHON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src/arrow/python")
file(MAKE_DIRECTORY ${ARROW_PYTHON_BINARY_DIR})
add_custom_command(OUTPUT "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
"${ARROW_PYTHON_BINARY_DIR}/lib.h"
COMMAND ${CMAKE_COMMAND} -E copy
"${CMAKE_CURRENT_BINARY_DIR}/lib_api.h"
"${CMAKE_CURRENT_BINARY_DIR}/lib.h"
"${ARROW_PYTHON_BINARY_DIR}/"
DEPENDS lib_pyx)
add_custom_target(cython_api_headers DEPENDS "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
"${ARROW_PYTHON_BINARY_DIR}/lib.h")
add_dependencies(arrow_python cython_api_headers)
install(FILES "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" "${ARROW_PYTHON_BINARY_DIR}/lib.h"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python)
# Additional link libraries
if(PYARROW_BUILD_CUDA)
target_link_libraries(_cuda PRIVATE ${CUDA_LINK_LIBS})
endif()
if(PYARROW_BUILD_FLIGHT)
target_link_libraries(_flight PRIVATE ${FLIGHT_LINK_LIBS})
endif()
if(PYARROW_BUILD_SUBSTRAIT)
target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS})
endif()
if(PYARROW_BUILD_ACERO)
target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS})
endif()
if(PYARROW_BUILD_DATASET)
target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
if(PYARROW_BUILD_ORC)
target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS})
endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_dataset_parquet PRIVATE ${DATASET_LINK_LIBS})
endif()
endif()
if(PYARROW_BUILD_GANDIVA)
target_link_libraries(gandiva PRIVATE ${GANDIVA_LINK_LIBS})
endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
endif()
endif()