blob: a5cb6061f617a6cc14a7e94f937512636ee6401a [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Development containers for Arrow
version: '3.5'
# TODO(kszucs): set arrow's mount to :ro mode, once all of the builds are
# passing without write access to the source directory. The following builds
# are contaminating the source directory:
# - docs
# - python-alpine (writes .egg directory)
# - rust (writes Cargo.lock)
# - java (without the rsync trick)
x-ubuntu-volumes:
&ubuntu-volumes
- .:/arrow:delegated
- ubuntu-cache:/build:delegated
x-ubuntu-trusty-volumes:
&ubuntu-trusty-volumes
- .:/arrow:delegated
- ubuntu-trusty-cache:/build:delegated
x-ubuntu-xenial-volumes:
&ubuntu-xenial-volumes
- .:/arrow:delegated
- ubuntu-xenial-cache:/build:delegated
x-ubuntu-bionic-volumes:
&ubuntu-bionic-volumes
- .:/arrow:delegated
- ubuntu-bionic-cache:/build:delegated
x-alpine-volumes:
&alpine-volumes
- .:/arrow:delegated
- alpine-cache:/build:delegated
x-fedora-volumes:
&fedora-volumes
- .:/arrow:delegated
- fedora-cache:/build:delegated
x-debian-testing-volumes:
&debian-testing-volumes
- .:/arrow:delegated
- debian-testing-cache:/build:delegated
volumes:
ubuntu-cache:
ubuntu-trusty-cache:
ubuntu-xenial-cache:
ubuntu-bionic-cache:
alpine-cache:
fedora-cache:
debian-testing-cache:
maven-cache:
spark-cache:
services:
######################### Helper Containers #################################
build-context:
# The performance of the docker containers often depends on the size of the
# build context. This is a helper container that adds the full build context
# into the /context folder.
#
# One can inspect the folder and add files that should not be in the context
# to .dockerignore.
#
# Usage:
# docker-compose build build-context
# docker-compose run build-context
image: arrow:build-context
build:
context: .
dockerfile: dev/Dockerfile.context
######################### Language Containers ###############################
c_glib:
# Usage:
# docker-compose build cpp
# docker-compose build c_glib
# docker-compose run c_glib
image: arrow:c_glib
build:
context: .
dockerfile: c_glib/Dockerfile
volumes: *ubuntu-volumes
cpp:
# Usage:
# docker-compose build cpp
# docker-compose run cpp
image: arrow:cpp
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile
volumes: *ubuntu-volumes
cpp-system-deps:
# Usage:
# docker-compose build cpp-system-deps
# docker-compose run cpp-system-deps
image: ursalab/arrow:cpp-system-deps
shm_size: 2G
cap_add:
# LeakSanitizer and gdb requires ptrace(2)
- SYS_PTRACE
build:
context: .
dockerfile: cpp/Dockerfile.ubuntu-bionic
args:
LLVM_VERSION: 7
environment:
ARROW_FLIGHT: "ON"
ARROW_USE_ASAN: "ON"
ARROW_USE_UBSAN: "ON"
volumes:
- .:/arrow:delegated
- ubuntu-cache:/build:delegated
- ${HOME}/.ccache:/build/ccache:cached
cpp-release:
# Usage:
# docker-compose build cpp
# docker-compose run cpp-release
image: arrow:cpp
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile
environment:
ARROW_BUILD_TYPE: release
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-volumes
cpp-static-only:
# Usage:
# docker-compose build cpp
# docker-compose run cpp-static-only
image: arrow:cpp
shm_size: 2G
environment:
ARROW_BUILD_SHARED: "OFF"
ARROW_TEST_LINKAGE: "static"
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-volumes
cpp-shared-only:
# Usage:
# docker-compose build cpp
# docker-compose run cpp-static-only
image: arrow:cpp
shm_size: 2G
environment:
ARROW_BUILD_STATIC: "OFF"
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-volumes
cpp-cmake32:
# Usage:
# docker-compose build cpp-cmake32
# docker-compose run cpp-cmake32
image: arrow:cpp-cmake32
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.cmake
args:
CMAKE_VERSION: 3.2.3
environment:
# Vendor boost to avoid dealing with stale FindBoost.
ARROW_BOOST_SOURCE: "BUNDLED"
ARROW_ORC: "OFF"
# uriparser requires cmake 3.3
ARROW_FLIGHT: "OFF"
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-volumes
cpp-alpine:
# Usage:
# docker-compose build cpp-alpine
# docker-compose run cpp-alpine
image: arrow:cpp-alpine
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.alpine
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *alpine-volumes
cpp-fedora:
# Usage:
# docker-compose build cpp-fedora
# docker-compose run cpp-fedora
image: arrow:cpp-fedora
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.fedora
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *fedora-volumes
cpp-debian-testing:
# Usage:
# docker-compose build cpp-debian-testing
# docker-compose run cpp-debian-testing
image: arrow:cpp-debian-testing
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.debian-testing
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *debian-testing-volumes
cpp-ubuntu-trusty:
# Usage:
# docker-compose build cpp-ubuntu-trusty
# docker-compose run cpp-ubuntu-trusty
image: arrow:cpp-ubuntu-trusty
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.ubuntu-trusty
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-trusty-volumes
cpp-ubuntu-xenial:
# Usage:
# docker-compose build cpp-ubuntu-xenial
# docker-compose run cpp-ubuntu-xenial
image: arrow:cpp-ubuntu-xenial
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.ubuntu-xenial
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-xenial-volumes
cpp-minimum-boost-version:
# Ubuntu Xenial 16.04 has Boost 1.58, our minimum version
#
# Usage:
# docker-compose build cpp-minimum-boost-version
# docker-compose run cpp-minimum-boost-version
image: arrow:cpp-ubuntu-xenial
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.ubuntu-xenial
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
ARROW_BOOST_SOURCE: "SYSTEM"
ARROW_FLIGHT: "OFF"
volumes: *ubuntu-xenial-volumes
cpp-ubuntu-bionic:
# Usage:
# docker-compose build cpp-ubuntu-bionic
# docker-compose run cpp-ubuntu-bionic
image: arrow:cpp-ubuntu-bionic
shm_size: 2G
build:
context: .
dockerfile: cpp/Dockerfile.ubuntu-bionic
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-bionic-volumes
go:
# Usage:
# docker-compose build go
# docker-compose run go
image: arrow:go
build:
context: .
dockerfile: go/Dockerfile
volumes: *ubuntu-volumes
java:
# Usage:
# docker-compose build java
# docker-compose run java
image: arrow:java
build:
context: .
dockerfile: java/Dockerfile
volumes:
- .:/arrow:ro # ensures that docker won't contaminate the host directory
- maven-cache:/root/.m2:delegated
java-all-jdks:
# Usage:
# docker-compose build java-all-jdks
# docker-compose run java-all-jdks
image: ursalab/arrow-ci-java-all-jdks:latest
build:
context: .
dockerfile: java/Dockerfile.all-jdks
volumes:
- .:/arrow:ro # ensures that docker won't contaminate the host directory
- maven-cache:/root/.m2:delegated
js:
image: arrow:js
build:
context: .
dockerfile: js/Dockerfile
python:
# Usage:
# export PYTHON_VERSION=2.7|3.6|3.7
# docker-compose build cpp
# docker-compose build python
# docker-compose run python
image: arrow:python-${PYTHON_VERSION:-3.6}
shm_size: 2G
build:
context: .
dockerfile: python/Dockerfile
args:
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
volumes: *ubuntu-volumes
python-alpine:
# Usage:
# export PYTHON_VERSION=2.7|3.6 (minor version is ignored)
# docker-compose build cpp-alpine
# docker-compose build python-alpine
# docker-compose run python-alpine
image: arrow:python-${PYTHON_VERSION:-3.6}-alpine
shm_size: 2G
build:
context: .
dockerfile: python/Dockerfile.alpine
args:
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
volumes: *alpine-volumes
python-nopandas:
# Usage:
# export PYTHON_VERSION=2.7|3.6|3.7
# docker-compose build cpp
# docker-compose build python
# docker-compose build python-nopandas
# docker-compose run python-nopandas
image: arrow:python-${PYTHON_VERSION:-3.6}-nopandas
shm_size: 2G
build:
context: .
dockerfile: python/Dockerfile.nopandas
args:
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
volumes: *ubuntu-volumes
rust:
# Usage:
# docker-compose build rust
# docker-compose run rust
image: arrow:rust
build:
context: .
dockerfile: rust/Dockerfile
environment:
PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
volumes: *ubuntu-volumes
r:
# Usage:
# docker-compose build cpp
# docker-compose build r
# docker-compose run r
image: arrow:r
build:
context: .
dockerfile: r/Dockerfile
volumes: *ubuntu-volumes
r-sanitizer:
# Usage:
# docker-compose build r-sanitizer
# docker-compose run r-sanitizer
image: arrow:r-sanitizer
cap_add:
# LeakSanitizer and gdb requires ptrace(2)
- SYS_PTRACE
build:
context: .
dockerfile: r/Dockerfile.sanitizer
volumes: *ubuntu-volumes
r-conda:
# Usage:
# export R_VERSION=3.5.1
# docker-compose build cpp
# docker-compose build r-conda
# docker-compose run r-conda
image: arrow:r-conda-${R_VERSION:-3.5.1}
shm_size: 2G
build:
context: .
dockerfile: r/Dockerfile.conda
args:
R_VERSION: ${R_VERSION:-3.5.1}
volumes: *ubuntu-volumes
######################### Tools and Linters #################################
# TODO(kszucs): site
# TODO(kszucs): {cpp,java,glib,js}-apidoc
lint:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build lint
# docker-compose run lint
image: arrow:lint
build:
context: .
dockerfile: dev/lint/Dockerfile
command: arrow/dev/lint/run_linters.sh
volumes: *ubuntu-volumes
fuzzit:
# Usage:
# docker-compose build cpp
# docker-compose build fuzzit
# docker-compose run fuzzit
image: arrow:fuzzit
build:
context: .
dockerfile: dev/fuzzit/Dockerfile
environment:
CI_ARROW_SHA: ${CI_ARROW_SHA}
CI_ARROW_BRANCH: ${CI_ARROW_BRANCH}
volumes: *ubuntu-volumes
iwyu:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build lint
# docker-compose run iwyu
image: arrow:lint
command: arrow/dev/lint/run_iwyu.sh
volumes: *ubuntu-volumes
clang-format:
# Usage:
# docker-compose build cpp
# docker-compose build python
# docker-compose build lint
# docker-compose run clang-format
image: arrow:lint
command: arrow/dev/lint/run_clang_format.sh
volumes: *ubuntu-volumes
clang-tidy:
# Usage:
# docker-compose build cpp
# docker-compose build python
# docker-compose build lint
# docker-compose run clang-tidy
image: arrow:lint
command: arrow/dev/lint/run_clang_tidy.sh
volumes: *ubuntu-volumes
docs:
# Usage:
# docker-compose build cpp
# docker-compose build python
# docker-compose build docs
# docker-compose run docs
image: arrow:docs
build:
context: .
dockerfile: docs/Dockerfile
volumes: *ubuntu-volumes
######################### Package Builder ###################################
python-manylinux1:
# Usage:
# either build:
# $ docker-compose build python-manylinux1
# or pull:
# $ docker-compose pull python-manylinux1
# an then run:
# $ docker-compose run -e PYTHON_VERSION=3.7 python-manylinux1
image: ursalab/arrow_manylinux1_x86_64_base:0.14.1-static-zlib
build:
context: python/manylinux1
dockerfile: Dockerfile-x86_64_base
shm_size: 2G
environment:
PYARROW_PARALLEL: 3
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
UNICODE_WIDTH: ${UNICODE_WIDTH:-16}
volumes:
- .:/arrow:delegated
- ./python/manylinux1:/io:delegated
command: /io/build_arrow.sh
python-manylinux2010:
# Usage:
# either build:
# $ docker-compose build python-manylinux2010
# or pull:
# $ docker-compose pull python-manylinux2010
# an then run:
# $ docker-compose run -e PYTHON_VERSION=3.7 python-manylinux2010
image: ursalab/arrow_manylinux2010_x86_64_base:0.14.1-static-zlib
build:
context: python/manylinux2010
dockerfile: Dockerfile-x86_64_base
shm_size: 2G
environment:
PYARROW_PARALLEL: 3
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
UNICODE_WIDTH: ${UNICODE_WIDTH:-16}
volumes:
- .:/arrow:delegated
- ./python/manylinux2010:/io:delegated
command: /io/build_arrow.sh
######################### Integration Tests #################################
# impala:
# image: cpcloud86/impala:java8-1
# ports:
# - "21050"
# hostname: impala
pandas-master:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build --no-cache pandas-master
# docker-compose run pandas-master
image: arrow:pandas-master
build:
context: .
dockerfile: integration/pandas/Dockerfile
shm_size: 2G
volumes: *ubuntu-volumes
hdfs-namenode:
image: gelog/hadoop
shm_size: 2G
ports:
- "9000:9000"
- "50070:50070"
command: hdfs namenode
hostname: hdfs-namenode
hdfs-datanode-1:
image: gelog/hadoop
command: hdfs datanode
ports:
# The host port is randomly assigned by Docker, to allow scaling
# to multiple DataNodes on the same host
- "50075"
links:
- hdfs-namenode:hdfs-namenode
hdfs-datanode-2:
image: gelog/hadoop
command: hdfs datanode
ports:
# The host port is randomly assigned by Docker, to allow scaling
# to multiple DataNodes on the same host
- "50075"
links:
- hdfs-namenode:hdfs-namenode
hdfs-integration:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build hdfs-integration
# docker-compose run hdfs-integration
image: arrow:hdfs-${HDFS_VERSION:-2.9.2}
links:
- hdfs-namenode:hdfs-namenode
- hdfs-datanode-1:hdfs-datanode-1
- hdfs-datanode-2:hdfs-datanode-2
environment:
- ARROW_HDFS_TEST_HOST=hdfs-namenode
- ARROW_HDFS_TEST_PORT=9000
- ARROW_HDFS_TEST_USER=root
build:
context: .
dockerfile: integration/hdfs/Dockerfile
args:
HDFS_VERSION: ${HDFS_VERSION:-2.9.2}
volumes: *ubuntu-volumes
# TODO(kszucs): pass dask version explicitly as a build argument
dask-integration:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build dask-integration
# docker-compose run dask-integration
image: arrow:dask-integration
build:
context: .
dockerfile: integration/dask/Dockerfile
volumes: *ubuntu-volumes
turbodbc-integration:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build turbodbc-integration
# docker-compose run turbodbc-integration
image: arrow:turbodbc-integration
build:
context: .
dockerfile: integration/turbodbc/Dockerfile
volumes: *ubuntu-volumes
spark-integration:
# Usage:
# export PYTHON_VERSION=3.6
# docker-compose build cpp
# docker-compose build python
# docker-compose build spark-integration
# docker-compose run spark-integration
image: arrow:spark-${SPARK_VERSION:-master}
environment:
- SPARK_VERSION=${SPARK_VERSION:-master}
build:
context: .
dockerfile: integration/spark/Dockerfile
volumes:
- .:/arrow:ro # ensures that docker won't contaminate the host directory
- ubuntu-cache:/build:delegated
- maven-cache:/root/.m2:delegated
- spark-cache:/spark:delegated
# TODO(kszucs): hive-integration
################################# Release ###################################
release-source:
image: arrow:release-source
build:
context: .
dockerfile: dev/release/source/Dockerfile
volumes:
- .:/arrow:delegated
######################## Verification Containers ############################
debian-stretch:
image: debian:stretch
volumes:
- .:/arrow:delegated
debian-buster:
image: debian:buster
volumes:
- .:/arrow:delegated
ubuntu-trusty:
image: ubuntu:trusty
volumes:
- .:/arrow:delegated
ubuntu-xenial:
image: ubuntu:xenial
volumes:
- .:/arrow:delegated
ubuntu-bionic:
image: ubuntu:bionic
volumes:
- .:/arrow:delegated
ubuntu-disco:
image: ubuntu:disco
volumes:
- .:/arrow:delegated
centos-6:
image: centos:6
volumes:
- .:/arrow:delegated
centos-7:
image: centos:7
volumes:
- .:/arrow:delegated