blob: 18fb20feefb3604723c2a90236f9de266cecf21e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT.
#
ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye"
FROM ${PYTHON_BASE_IMAGE} as main
# Nolog bash flag is currently ignored - but you can replace it with other flags (for example
# xtrace - to show commands executed)
SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"]
ARG PYTHON_BASE_IMAGE
ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow"
# By increasing this number we can do force build of all dependencies
ARG DEPENDENCIES_EPOCH_NUMBER="6"
# Make sure noninteractive debian install is used and language variables set
ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \
DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \
DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true"
RUN echo "Base image version: ${PYTHON_BASE_IMAGE}"
ARG ADDITIONAL_DEV_APT_DEPS=""
ARG DEV_APT_COMMAND="\
curl --silent --fail --location https://deb.nodesource.com/setup_14.x | bash - \
&& curl --silent --fail https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - >/dev/null 2>&1 \
&& echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list"
ARG ADDITIONAL_DEV_APT_COMMAND=""
ARG ADDITIONAL_DEV_ENV_VARS=""
ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \
ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \
ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND}
COPY scripts/docker/determine_debian_version_specific_variables.sh /scripts/docker/
# Install basic and additional apt dependencies
RUN apt-get update \
&& apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 \
&& apt-get install -y --no-install-recommends curl gnupg2 lsb-release \
&& mkdir -pv /usr/share/man/man1 \
&& mkdir -pv /usr/share/man/man7 \
&& export ${ADDITIONAL_DEV_ENV_VARS?} \
&& source /scripts/docker/determine_debian_version_specific_variables.sh \
&& bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" \
&& bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
apt-utils \
build-essential \
dirmngr \
dumb-init \
freetds-bin \
freetds-dev \
git \
graphviz \
gosu \
libffi-dev \
libldap2-dev \
libkrb5-dev \
libpq-dev \
libsasl2-2 \
libsasl2-dev \
libsasl2-modules \
libssl-dev \
"${DISTRO_LIBENCHANT}" \
locales \
netcat \
nodejs \
rsync \
sasl2-bin \
sudo \
unixodbc \
unixodbc-dev \
yarn \
${ADDITIONAL_DEV_APT_DEPS} \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Only copy mysql/mssql installation scripts for now - so that changing the other
# scripts which are needed much later will not invalidate the docker layer here.
COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh scripts/docker/install_postgres.sh /scripts/docker/
# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an
# unexpected result - the cache for Dockerfiles might get invalidated in case the host system
# had different umask set and group x bit was not set. In Azure the bit might be not set at all.
# That also protects against AUFS Docker backen dproblem where changing the executable bit required sync
RUN bash /scripts/docker/install_mysql.sh prod \
&& bash /scripts/docker/install_mysql.sh dev \
&& bash /scripts/docker/install_mssql.sh \
&& bash /scripts/docker/install_postgres.sh dev \
&& adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \
--quiet "airflow" --home "/home/airflow" \
&& echo -e "airflow\nairflow" | passwd airflow 2>&1 \
&& echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
&& chmod 0440 /etc/sudoers.d/airflow
ARG RUNTIME_APT_DEPS="\
apt-transport-https \
bash-completion \
ca-certificates \
software-properties-common \
krb5-user \
krb5-user \
ldap-utils \
less \
lsb-release \
net-tools \
openssh-client \
openssh-server \
postgresql-client \
sqlite3 \
tmux \
unzip \
vim \
xxd"
# Install Helm
ARG HELM_VERSION="v3.6.3"
RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \
&& PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \
&& HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \
&& curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \
&& chmod +x /usr/local/bin/helm
ARG ADDITIONAL_RUNTIME_APT_DEPS=""
ARG RUNTIME_APT_COMMAND=""
ARG ADDITIONAL_RUNTIME_APT_COMMAND=""
ARG ADDITIONAL_DEV_APT_ENV=""
ARG ADDITIONAL_RUNTIME_APT_ENV=""
ARG DOCKER_CLI_VERSION=19.03.9
ARG HOME=/root
ARG AIRFLOW_HOME=/root/airflow
ARG AIRFLOW_SOURCES=/opt/airflow
ENV RUNTIME_APT_DEP=${RUNTIME_APT_DEPS} \
ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \
RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \
ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND}\
DOCKER_CLI_VERSION=${DOCKER_CLI_VERSION} \
HOME=${HOME} \
AIRFLOW_HOME=${AIRFLOW_HOME} \
AIRFLOW_SOURCES=${AIRFLOW_SOURCES}
RUN export ${ADDITIONAL_DEV_APT_ENV?} \
&& export ${ADDITIONAL_RUNTIME_APT_ENV?} \
&& source /scripts/docker/determine_debian_version_specific_variables.sh \
&& bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" \
&& bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
"${DISTRO_LIBGCC}" \
${RUNTIME_APT_DEPS} \
${ADDITIONAL_RUNTIME_APT_DEPS} \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& curl --silent "https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz" \
| tar -C /usr/bin --strip-components=1 -xvzf - docker/docker
WORKDIR ${AIRFLOW_SOURCES}
RUN mkdir -pv ${AIRFLOW_HOME} && \
mkdir -pv ${AIRFLOW_HOME}/dags && \
mkdir -pv ${AIRFLOW_HOME}/logs
ARG AIRFLOW_REPO=apache/airflow
ARG AIRFLOW_BRANCH=main
# Airflow Extras installed
ARG AIRFLOW_EXTRAS="all"
ARG ADDITIONAL_AIRFLOW_EXTRAS=""
# Allows to override constraints source
ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow"
ARG AIRFLOW_CONSTRAINTS="constraints"
ARG AIRFLOW_CONSTRAINTS_REFERENCE=""
ARG AIRFLOW_CONSTRAINTS_LOCATION=""
ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main"
# By changing the epoch we can force reinstalling Airflow and pip all dependencies
# It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable.
ARG AIRFLOW_CI_BUILD_EPOCH="3"
ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
# By default in the image, we are installing all providers when installing from sources
ARG INSTALL_PROVIDERS_FROM_SOURCES="true"
ARG INSTALL_FROM_PYPI="true"
ARG AIRFLOW_PIP_VERSION=22.0.4
# Setup PIP
# By default PIP install run without cache to make image smaller
ARG PIP_NO_CACHE_DIR="true"
# By default PIP has progress bar but you can disable it.
ARG PIP_PROGRESS_BAR="on"
# Optimizing installation of Cassandra driver (in case there are no prebuilt wheels which is the
# case as of 20.04.2021 with Python 3.9
# Speeds up building the image - cassandra driver without CYTHON saves around 10 minutes
ARG CASS_DRIVER_NO_CYTHON="1"
# Build cassandra driver on multiple CPUs
ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
ARG AIRFLOW_VERSION="2.3.0.dev"
ENV AIRFLOW_REPO=${AIRFLOW_REPO}\
AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \
AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \
AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \
AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \
INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} \
AIRFLOW_VERSION=${AIRFLOW_VERSION} \
AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
# In the CI image we always:
# * install MySQL, MsSQL
# * install airflow from current sources, not from PyPI package
# * install airflow without `--user` flag
# * install airflow in editable mode
# * install always current version of airflow
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true" \
AIRFLOW_INSTALLATION_METHOD="." \
AIRFLOW_INSTALL_EDITABLE_FLAG="--editable" \
AIRFLOW_VERSION_SPECIFICATION="" \
PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \
PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \
CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON}
RUN echo "Airflow version: ${AIRFLOW_VERSION}"
# Those are additional constraints that are needed for some extras but we do not want to
# force them on the main Airflow package. Those limitations are:
# * certifi<2021.0.0: required by snowflake provider
# * dill<0.3.3 required by apache-beam
# * google-ads<14.0.1 required to prevent updating google-python-api>=2.0.0
ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3 certifi<2021.0.0 google-ads<14.0.1"
ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \
UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
# Copy all scripts required for installation - changing any of those should lead to
# rebuilding from here
COPY scripts/docker/install_pip_version.sh scripts/docker/install_airflow_dependencies_from_branch_tip.sh \
scripts/docker/common.sh \
/scripts/docker/
# We are first creating a venv where all python packages and .so binaries needed by those are
# installed.
# In case of CI builds we want to pre-install main version of airflow dependencies so that
# We do not have to always reinstall it from the scratch.
# And is automatically reinstalled from the scratch every time patch release of python gets released
# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false")
# are uninstalled, only dependencies remain.
# the cache is only used when "upgrade to newer dependencies" is not set to automatically
# account for removed dependencies (we do not install them in the first place)
RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not valid but we can't disable them :(\e[0m\n"; \
echo -e "\n\e[34mSee https://github.com/pypa/pip/issues/10556 for details.\e[0m\n" ; \
bash /scripts/docker/install_pip_version.sh; \
if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
fi
# The PATH is needed for PIPX to find the tools installed
ENV PATH="/root/.local/bin:${PATH}"
COPY scripts/docker/install_pipx_tools.sh /scripts/docker/
# Install useful command line tools in their own virtualenv so that they do not clash with
# dependencies installed in Airflow
RUN bash /scripts/docker/install_pipx_tools.sh
# Copy package.json and yarn.lock to install node modules
# this way even if other static check files change, node modules will not need to be installed
# we want to keep node_modules so we can do this step separately from compiling assets
COPY airflow/www/package.json airflow/www/yarn.lock ${AIRFLOW_SOURCES}/airflow/www/
COPY scripts/docker/prepare_node_modules.sh /scripts/docker/
# Package JS/css for production
RUN bash /scripts/docker/prepare_node_modules.sh
# Copy all the needed www/ for assets compilation. Done as two separate COPY
# commands so as otherwise it copies the _contents_ of static/ in to www/
COPY airflow/www/webpack.config.js ${AIRFLOW_SOURCES}/airflow/www/
COPY airflow/www/static ${AIRFLOW_SOURCES}/airflow/www/static/
COPY scripts/docker/compile_www_assets.sh /scripts/docker/
# Build artifacts without removing temporary artifacts (we will need them for incremental changes)
# in build mode
RUN REMOVE_ARTIFACTS="false" BUILD_TYPE="build" bash /scripts/docker/compile_www_assets.sh
# Airflow sources change frequently but dependency configuration won't change that often
# We copy setup.py and other files needed to perform setup of dependencies
# So in case setup.py changes we can install latest dependencies required.
COPY setup.py ${AIRFLOW_SOURCES}/setup.py
COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg
COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py
COPY scripts/docker/install_airflow.sh /scripts/docker/
# The goal of this line is to install the dependencies from the most current setup.py from sources
# This will be usually incremental small set of packages in CI optimized build, so it will be very fast
# In non-CI optimized build this will install all dependencies before installing sources.
# Usually we will install versions based on the dependencies in setup.py and upgraded only if needed.
# But in cron job we will install latest versions matching setup.py to see if there is no breaking change
# and push the constraints if everything is successful
RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
bash /scripts/docker/install_airflow.sh; \
fi
COPY scripts/in_container/entrypoint_ci.sh /entrypoint
RUN chmod a+x /entrypoint
COPY scripts/docker/install_pip_version.sh scripts/docker/install_additional_dependencies.sh /scripts/docker/
# Additional python deps to install
ARG ADDITIONAL_PYTHON_DEPS=""
RUN bash /scripts/docker/install_pip_version.sh; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
bash /scripts/docker/install_additional_dependencies.sh; \
fi
# Install autocomplete for airflow
RUN if command -v airflow; then \
register-python-argcomplete airflow >> ~/.bashrc ; \
fi
# Install autocomplete for Kubectl
RUN echo "source /etc/bash_completion" >> ~/.bashrc
# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not
# copying over stuff that is accidentally generated or that we do not need (such as egg-info)
# if you want to add something that is missing and you expect to see it in the image you can
# add it with ! in .dockerignore next to the airflow, test etc. directories there
COPY . ${AIRFLOW_SOURCES}/
WORKDIR ${AIRFLOW_SOURCES}
ARG BUILD_ID
ARG COMMIT_SHA
ARG AIRFLOW_IMAGE_DATE_CREATED
ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \
GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \
BUILD_ID=${BUILD_ID} \
COMMIT_SHA=${COMMIT_SHA}
# Link dumb-init for backwards compatibility (so that older images also work)
RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
EXPOSE 8080
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow-ci" \
org.apache.airflow.version="${AIRFLOW_VERSION}" \
org.apache.airflow.uid="0" \
org.apache.airflow.gid="0" \
org.apache.airflow.build-id="${BUILD_ID}" \
org.apache.airflow.commit-sha="${COMMIT_SHA}" \
org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \
org.opencontainers.image.created="${AIRFLOW_IMAGE_DATE_CREATED}" \
org.opencontainers.image.authors="dev@airflow.apache.org" \
org.opencontainers.image.url="https://airflow.apache.org" \
org.opencontainers.image.documentation="https://github.com/apache/airflow/IMAGES.rst" \
org.opencontainers.image.source="https://github.com/apache/airflow" \
org.opencontainers.image.version="${AIRFLOW_VERSION}" \
org.opencontainers.image.revision="${COMMIT_SHA}" \
org.opencontainers.image.vendor="Apache Software Foundation" \
org.opencontainers.image.licenses="Apache-2.0" \
org.opencontainers.image.ref.name="airflow-ci-image" \
org.opencontainers.image.title="Continuous Integration Airflow Image" \
org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies"
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD []