| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| # WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. |
| # |
| ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" |
| FROM ${PYTHON_BASE_IMAGE} as main |
| |
| # Nolog bash flag is currently ignored - but you can replace it with other flags (for example |
| # xtrace - to show commands executed) |
| SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] |
| |
| ARG PYTHON_BASE_IMAGE |
| ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" |
| |
| # By increasing this number we can do force build of all dependencies |
| ARG DEPENDENCIES_EPOCH_NUMBER="6" |
| |
| # Make sure noninteractive debian install is used and language variables set |
| ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ |
| DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ |
| LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ |
| DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \ |
| INSTALL_MYSQL_CLIENT="true" \ |
| INSTALL_MSSQL_CLIENT="true" \ |
| INSTALL_POSTGRES_CLIENT="true" |
| |
| RUN echo "Base image version: ${PYTHON_BASE_IMAGE}" |
| |
| ARG ADDITIONAL_DEV_APT_DEPS="" |
| ARG DEV_APT_COMMAND="\ |
| curl --silent --fail --location https://deb.nodesource.com/setup_14.x | bash - \ |
| && curl --silent --fail https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - >/dev/null 2>&1 \ |
| && echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list" |
| ARG ADDITIONAL_DEV_APT_COMMAND="" |
| ARG ADDITIONAL_DEV_ENV_VARS="" |
| |
| ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \ |
| ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ |
| ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} |
| |
| COPY scripts/docker/determine_debian_version_specific_variables.sh /scripts/docker/ |
| |
| # Install basic and additional apt dependencies |
| RUN apt-get update \ |
| && apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 \ |
| && apt-get install -y --no-install-recommends curl gnupg2 lsb-release \ |
| && mkdir -pv /usr/share/man/man1 \ |
| && mkdir -pv /usr/share/man/man7 \ |
| && export ${ADDITIONAL_DEV_ENV_VARS?} \ |
| && source /scripts/docker/determine_debian_version_specific_variables.sh \ |
| && bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" \ |
| && bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" \ |
| && apt-get update \ |
| && apt-get install -y --no-install-recommends \ |
| apt-utils \ |
| build-essential \ |
| dirmngr \ |
| dumb-init \ |
| freetds-bin \ |
| freetds-dev \ |
| git \ |
| graphviz \ |
| gosu \ |
| libffi-dev \ |
| libldap2-dev \ |
| libkrb5-dev \ |
| libpq-dev \ |
| libsasl2-2 \ |
| libsasl2-dev \ |
| libsasl2-modules \ |
| libssl-dev \ |
| "${DISTRO_LIBENCHANT}" \ |
| locales \ |
| netcat \ |
| nodejs \ |
| rsync \ |
| sasl2-bin \ |
| sudo \ |
| unixodbc \ |
| unixodbc-dev \ |
| yarn \ |
| ${ADDITIONAL_DEV_APT_DEPS} \ |
| && apt-get autoremove -yqq --purge \ |
| && apt-get clean \ |
| && rm -rf /var/lib/apt/lists/* |
| |
| # Only copy mysql/mssql installation scripts for now - so that changing the other |
| # scripts which are needed much later will not invalidate the docker layer here. |
| COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh scripts/docker/install_postgres.sh /scripts/docker/ |
| # We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an |
| # unexpected result - the cache for Dockerfiles might get invalidated in case the host system |
| # had different umask set and group x bit was not set. In Azure the bit might be not set at all. |
| # That also protects against AUFS Docker backen dproblem where changing the executable bit required sync |
| RUN bash /scripts/docker/install_mysql.sh prod \ |
| && bash /scripts/docker/install_mysql.sh dev \ |
| && bash /scripts/docker/install_mssql.sh \ |
| && bash /scripts/docker/install_postgres.sh dev \ |
| && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ |
| --quiet "airflow" --home "/home/airflow" \ |
| && echo -e "airflow\nairflow" | passwd airflow 2>&1 \ |
| && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \ |
| && chmod 0440 /etc/sudoers.d/airflow |
| |
| ARG RUNTIME_APT_DEPS="\ |
| apt-transport-https \ |
| bash-completion \ |
| ca-certificates \ |
| software-properties-common \ |
| krb5-user \ |
| krb5-user \ |
| ldap-utils \ |
| less \ |
| lsb-release \ |
| net-tools \ |
| openssh-client \ |
| openssh-server \ |
| postgresql-client \ |
| sqlite3 \ |
| tmux \ |
| unzip \ |
| vim \ |
| xxd" |
| |
| # Install Helm |
| ARG HELM_VERSION="v3.6.3" |
| |
| RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \ |
| && PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \ |
| && HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \ |
| && curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \ |
| && chmod +x /usr/local/bin/helm |
| |
| ARG ADDITIONAL_RUNTIME_APT_DEPS="" |
| ARG RUNTIME_APT_COMMAND="" |
| ARG ADDITIONAL_RUNTIME_APT_COMMAND="" |
| ARG ADDITIONAL_DEV_APT_ENV="" |
| ARG ADDITIONAL_RUNTIME_APT_ENV="" |
| |
| ARG DOCKER_CLI_VERSION=19.03.9 |
| ARG HOME=/root |
| ARG AIRFLOW_HOME=/root/airflow |
| ARG AIRFLOW_SOURCES=/opt/airflow |
| |
| ENV RUNTIME_APT_DEP=${RUNTIME_APT_DEPS} \ |
| ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \ |
| RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \ |
| ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND}\ |
| DOCKER_CLI_VERSION=${DOCKER_CLI_VERSION} \ |
| HOME=${HOME} \ |
| AIRFLOW_HOME=${AIRFLOW_HOME} \ |
| AIRFLOW_SOURCES=${AIRFLOW_SOURCES} |
| |
| RUN export ${ADDITIONAL_DEV_APT_ENV?} \ |
| && export ${ADDITIONAL_RUNTIME_APT_ENV?} \ |
| && source /scripts/docker/determine_debian_version_specific_variables.sh \ |
| && bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" \ |
| && bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" \ |
| && apt-get update \ |
| && apt-get install --no-install-recommends -y \ |
| "${DISTRO_LIBGCC}" \ |
| ${RUNTIME_APT_DEPS} \ |
| ${ADDITIONAL_RUNTIME_APT_DEPS} \ |
| && apt-get autoremove -yqq --purge \ |
| && apt-get clean \ |
| && rm -rf /var/lib/apt/lists/* \ |
| && curl --silent "https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz" \ |
| | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker |
| |
| WORKDIR ${AIRFLOW_SOURCES} |
| |
| RUN mkdir -pv ${AIRFLOW_HOME} && \ |
| mkdir -pv ${AIRFLOW_HOME}/dags && \ |
| mkdir -pv ${AIRFLOW_HOME}/logs |
| |
| ARG AIRFLOW_REPO=apache/airflow |
| ARG AIRFLOW_BRANCH=main |
| # Airflow Extras installed |
| ARG AIRFLOW_EXTRAS="all" |
| ARG ADDITIONAL_AIRFLOW_EXTRAS="" |
| # Allows to override constraints source |
| ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" |
| ARG AIRFLOW_CONSTRAINTS="constraints" |
| ARG AIRFLOW_CONSTRAINTS_REFERENCE="" |
| ARG AIRFLOW_CONSTRAINTS_LOCATION="" |
| ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" |
| # By changing the epoch we can force reinstalling Airflow and pip all dependencies |
| # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. |
| ARG AIRFLOW_CI_BUILD_EPOCH="3" |
| ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" |
| # By default in the image, we are installing all providers when installing from sources |
| ARG INSTALL_PROVIDERS_FROM_SOURCES="true" |
| ARG INSTALL_FROM_PYPI="true" |
| ARG AIRFLOW_PIP_VERSION=22.0.4 |
| # Setup PIP |
| # By default PIP install run without cache to make image smaller |
| ARG PIP_NO_CACHE_DIR="true" |
| # By default PIP has progress bar but you can disable it. |
| ARG PIP_PROGRESS_BAR="on" |
| # Optimizing installation of Cassandra driver (in case there are no prebuilt wheels which is the |
| # case as of 20.04.2021 with Python 3.9 |
| # Speeds up building the image - cassandra driver without CYTHON saves around 10 minutes |
| ARG CASS_DRIVER_NO_CYTHON="1" |
| # Build cassandra driver on multiple CPUs |
| ARG CASS_DRIVER_BUILD_CONCURRENCY="8" |
| |
| ARG AIRFLOW_VERSION="2.3.0.dev" |
| |
| ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ |
| AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ |
| AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ |
| CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ |
| AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \ |
| AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ |
| AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ |
| DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ |
| AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ |
| AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ |
| INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ |
| INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} \ |
| AIRFLOW_VERSION=${AIRFLOW_VERSION} \ |
| AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ |
| # In the CI image we always: |
| # * install MySQL, MsSQL |
| # * install airflow from current sources, not from PyPI package |
| # * install airflow without `--user` flag |
| # * install airflow in editable mode |
| # * install always current version of airflow |
| INSTALL_MYSQL_CLIENT="true" \ |
| INSTALL_MSSQL_CLIENT="true" \ |
| INSTALL_POSTGRES_CLIENT="true" \ |
| AIRFLOW_INSTALLATION_METHOD="." \ |
| AIRFLOW_INSTALL_EDITABLE_FLAG="--editable" \ |
| AIRFLOW_VERSION_SPECIFICATION="" \ |
| PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \ |
| PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ |
| CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \ |
| CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON} |
| |
| RUN echo "Airflow version: ${AIRFLOW_VERSION}" |
| |
| # Those are additional constraints that are needed for some extras but we do not want to |
| # force them on the main Airflow package. Those limitations are: |
| # * certifi<2021.0.0: required by snowflake provider |
| # * dill<0.3.3 required by apache-beam |
| # * google-ads<14.0.1 required to prevent updating google-python-api>=2.0.0 |
| ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3 certifi<2021.0.0 google-ads<14.0.1" |
| ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" |
| ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \ |
| UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} |
| |
| # Copy all scripts required for installation - changing any of those should lead to |
| # rebuilding from here |
| COPY scripts/docker/install_pip_version.sh scripts/docker/install_airflow_dependencies_from_branch_tip.sh \ |
| scripts/docker/common.sh \ |
| /scripts/docker/ |
| |
| # We are first creating a venv where all python packages and .so binaries needed by those are |
| # installed. |
| # In case of CI builds we want to pre-install main version of airflow dependencies so that |
| # We do not have to always reinstall it from the scratch. |
| # And is automatically reinstalled from the scratch every time patch release of python gets released |
| # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") |
| # are uninstalled, only dependencies remain. |
| # the cache is only used when "upgrade to newer dependencies" is not set to automatically |
| # account for removed dependencies (we do not install them in the first place) |
| RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not valid but we can't disable them :(\e[0m\n"; \ |
| echo -e "\n\e[34mSee https://github.com/pypa/pip/issues/10556 for details.\e[0m\n" ; \ |
| bash /scripts/docker/install_pip_version.sh; \ |
| if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ |
| ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ |
| bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ |
| fi |
| |
| # The PATH is needed for PIPX to find the tools installed |
| ENV PATH="/root/.local/bin:${PATH}" |
| |
| COPY scripts/docker/install_pipx_tools.sh /scripts/docker/ |
| |
| # Install useful command line tools in their own virtualenv so that they do not clash with |
| # dependencies installed in Airflow |
| RUN bash /scripts/docker/install_pipx_tools.sh |
| |
| # Copy package.json and yarn.lock to install node modules |
| # this way even if other static check files change, node modules will not need to be installed |
| # we want to keep node_modules so we can do this step separately from compiling assets |
| COPY airflow/www/package.json airflow/www/yarn.lock ${AIRFLOW_SOURCES}/airflow/www/ |
| COPY scripts/docker/prepare_node_modules.sh /scripts/docker/ |
| |
| # Package JS/css for production |
| RUN bash /scripts/docker/prepare_node_modules.sh |
| |
| # Copy all the needed www/ for assets compilation. Done as two separate COPY |
| # commands so as otherwise it copies the _contents_ of static/ in to www/ |
| COPY airflow/www/webpack.config.js ${AIRFLOW_SOURCES}/airflow/www/ |
| COPY airflow/www/static ${AIRFLOW_SOURCES}/airflow/www/static/ |
| COPY scripts/docker/compile_www_assets.sh /scripts/docker/ |
| |
| # Build artifacts without removing temporary artifacts (we will need them for incremental changes) |
| # in build mode |
| RUN REMOVE_ARTIFACTS="false" BUILD_TYPE="build" bash /scripts/docker/compile_www_assets.sh |
| |
| # Airflow sources change frequently but dependency configuration won't change that often |
| # We copy setup.py and other files needed to perform setup of dependencies |
| # So in case setup.py changes we can install latest dependencies required. |
| COPY setup.py ${AIRFLOW_SOURCES}/setup.py |
| COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg |
| |
| COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py |
| |
| COPY scripts/docker/install_airflow.sh /scripts/docker/ |
| |
| # The goal of this line is to install the dependencies from the most current setup.py from sources |
| # This will be usually incremental small set of packages in CI optimized build, so it will be very fast |
| # In non-CI optimized build this will install all dependencies before installing sources. |
| # Usually we will install versions based on the dependencies in setup.py and upgraded only if needed. |
| # But in cron job we will install latest versions matching setup.py to see if there is no breaking change |
| # and push the constraints if everything is successful |
| RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \ |
| bash /scripts/docker/install_airflow.sh; \ |
| fi |
| |
| COPY scripts/in_container/entrypoint_ci.sh /entrypoint |
| RUN chmod a+x /entrypoint |
| |
| COPY scripts/docker/install_pip_version.sh scripts/docker/install_additional_dependencies.sh /scripts/docker/ |
| |
| |
| # Additional python deps to install |
| ARG ADDITIONAL_PYTHON_DEPS="" |
| |
| RUN bash /scripts/docker/install_pip_version.sh; \ |
| if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ |
| bash /scripts/docker/install_additional_dependencies.sh; \ |
| fi |
| |
| # Install autocomplete for airflow |
| RUN if command -v airflow; then \ |
| register-python-argcomplete airflow >> ~/.bashrc ; \ |
| fi |
| |
| # Install autocomplete for Kubectl |
| RUN echo "source /etc/bash_completion" >> ~/.bashrc |
| |
| # We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not |
| # copying over stuff that is accidentally generated or that we do not need (such as egg-info) |
| # if you want to add something that is missing and you expect to see it in the image you can |
| # add it with ! in .dockerignore next to the airflow, test etc. directories there |
| COPY . ${AIRFLOW_SOURCES}/ |
| |
| WORKDIR ${AIRFLOW_SOURCES} |
| |
| ARG BUILD_ID |
| ARG COMMIT_SHA |
| ARG AIRFLOW_IMAGE_DATE_CREATED |
| |
| ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \ |
| GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \ |
| BUILD_ID=${BUILD_ID} \ |
| COMMIT_SHA=${COMMIT_SHA} |
| |
| # Link dumb-init for backwards compatibility (so that older images also work) |
| RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init |
| |
| EXPOSE 8080 |
| |
| LABEL org.apache.airflow.distro="debian" \ |
| org.apache.airflow.module="airflow" \ |
| org.apache.airflow.component="airflow" \ |
| org.apache.airflow.image="airflow-ci" \ |
| org.apache.airflow.version="${AIRFLOW_VERSION}" \ |
| org.apache.airflow.uid="0" \ |
| org.apache.airflow.gid="0" \ |
| org.apache.airflow.build-id="${BUILD_ID}" \ |
| org.apache.airflow.commit-sha="${COMMIT_SHA}" \ |
| org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \ |
| org.opencontainers.image.created="${AIRFLOW_IMAGE_DATE_CREATED}" \ |
| org.opencontainers.image.authors="dev@airflow.apache.org" \ |
| org.opencontainers.image.url="https://airflow.apache.org" \ |
| org.opencontainers.image.documentation="https://github.com/apache/airflow/IMAGES.rst" \ |
| org.opencontainers.image.source="https://github.com/apache/airflow" \ |
| org.opencontainers.image.version="${AIRFLOW_VERSION}" \ |
| org.opencontainers.image.revision="${COMMIT_SHA}" \ |
| org.opencontainers.image.vendor="Apache Software Foundation" \ |
| org.opencontainers.image.licenses="Apache-2.0" \ |
| org.opencontainers.image.ref.name="airflow-ci-image" \ |
| org.opencontainers.image.title="Continuous Integration Airflow Image" \ |
| org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies" |
| |
| ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] |
| CMD [] |