blob: c4921b7df90c92bde40cf10c7e3e4da1b1624449 [file] [log] [blame]
# syntax=docker/dockerfile:1.4
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT.
#
ARG BASE_IMAGE="debian:bookworm-slim"
##############################################################################################
# This is the script image where we keep all inlined bash scripts needed in other segments
# We use BASE_IMAGE to make sure that the scripts are different for different platforms.
##############################################################################################
FROM ${BASE_IMAGE} as scripts
##############################################################################################
# Please DO NOT modify the inlined scripts manually. The content of those files will be
# replaced by prek automatically from the "scripts/docker/" folder.
# This is done in order to avoid problems with caching and file permissions and in order to
# make the PROD Dockerfile standalone
##############################################################################################
# The content below is automatically copied from scripts/docker/install_os_dependencies.sh
COPY <<"EOF" /install_os_dependencies.sh
#!/usr/bin/env bash
set -euo pipefail
if [[ "$#" != 1 ]]; then
echo
echo "ERROR! There should be 'runtime', 'ci' or 'dev' parameter passed as argument.".
echo
exit 1
fi
AIRFLOW_PYTHON_VERSION=${AIRFLOW_PYTHON_VERSION:-3.10.18}
GOLANG_MAJOR_MINOR_VERSION=${GOLANG_MAJOR_MINOR_VERSION:-1.24.4}
if [[ "${1}" == "runtime" ]]; then
INSTALLATION_TYPE="RUNTIME"
elif [[ "${1}" == "dev" ]]; then
INSTALLATION_TYPE="DEV"
elif [[ "${1}" == "ci" ]]; then
INSTALLATION_TYPE="CI"
else
echo
echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime', 'ci' or 'dev'.".
echo
exit 1
fi
function get_dev_apt_deps() {
if [[ "${DEV_APT_DEPS=}" == "" ]]; then
DEV_APT_DEPS="\
apt-transport-https \
apt-utils \
build-essential \
dirmngr \
freetds-bin \
freetds-dev \
git \
graphviz \
graphviz-dev \
krb5-user \
lcov \
ldap-utils \
libbluetooth-dev \
libbz2-dev \
libc6-dev \
libdb-dev \
libev-dev \
libev4 \
libffi-dev \
libgdbm-compat-dev \
libgdbm-dev \
libgdbm-dev \
libgeos-dev \
libkrb5-dev \
libldap2-dev \
libleveldb-dev \
libleveldb1d \
liblzma-dev \
libncurses5-dev \
libreadline6-dev \
libsasl2-2 \
libsasl2-dev \
libsasl2-modules \
libsqlite3-dev \
libssl-dev \
libxmlsec1 \
libxmlsec1-dev \
libzstd-dev \
locales \
lsb-release \
lzma \
lzma-dev \
openssh-client \
openssl \
pkg-config \
pkgconf \
sasl2-bin \
sqlite3 \
sudo \
tk-dev \
unixodbc \
unixodbc-dev \
uuid-dev \
wget \
xz-utils \
zlib1g-dev \
"
export DEV_APT_DEPS
fi
}
function get_runtime_apt_deps() {
local debian_version
local debian_version_apt_deps
# Get debian version without installing lsb_release
# shellcheck disable=SC1091
debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";)
echo
echo "DEBIAN CODENAME: ${debian_version}"
echo
debian_version_apt_deps="\
libffi8 \
libldap-2.5-0 \
libssl3 \
netcat-openbsd\
"
echo
echo "APPLIED INSTALLATION CONFIGURATION FOR DEBIAN VERSION: ${debian_version}"
echo
if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then
RUNTIME_APT_DEPS="\
${debian_version_apt_deps} \
apt-transport-https \
apt-utils \
curl \
dumb-init \
freetds-bin \
git \
gnupg \
iputils-ping \
krb5-user \
ldap-utils \
libev4 \
libgeos-dev \
libsasl2-2 \
libsasl2-modules \
libxmlsec1 \
locales \
lsb-release \
openssh-client \
rsync \
sasl2-bin \
sqlite3 \
sudo \
unixodbc \
wget\
"
export RUNTIME_APT_DEPS
fi
}
function install_docker_cli() {
apt-get update
apt-get install ca-certificates curl
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
chmod a+r /etc/apt/keyrings/docker.asc
# shellcheck disable=SC1091
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update
apt-get install -y --no-install-recommends docker-ce-cli
}
function install_debian_dev_dependencies() {
apt-get update
apt-get install -yqq --no-install-recommends apt-utils >/dev/null 2>&1
apt-get install -y --no-install-recommends wget curl gnupg2 lsb-release ca-certificates
# shellcheck disable=SC2086
export ${ADDITIONAL_DEV_APT_ENV?}
if [[ ${DEV_APT_COMMAND} != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}"
fi
if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}"
fi
apt-get update
local debian_version
local debian_version_apt_deps
# Get debian version without installing lsb_release
# shellcheck disable=SC1091
debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";)
echo
echo "DEBIAN CODENAME: ${debian_version}"
echo
# shellcheck disable=SC2086
apt-get install -y --no-install-recommends ${DEV_APT_DEPS}
}
function install_additional_dev_dependencies() {
if [[ "${ADDITIONAL_DEV_APT_DEPS=}" != "" ]]; then
# shellcheck disable=SC2086
apt-get install -y --no-install-recommends ${ADDITIONAL_DEV_APT_DEPS}
fi
}
function link_python() {
# link python binaries to /usr/local/bin and /usr/python/bin with and without 3 suffix
# Links in /usr/local/bin are needed for tools that expect python to be there
# Links in /usr/python/bin are needed for tools that are detecting home of python installation including
# lib/site-packages. The /usr/python/bin should be first in PATH in order to help with the last part.
for dst in pip3 python3 python3-config; do
src="$(echo "${dst}" | tr -d 3)"
echo "Linking ${dst} in /usr/local/bin and /usr/python/bin"
ln -sv "/usr/python/bin/${dst}" "/usr/local/bin/${dst}"
for dir in /usr/local/bin /usr/python/bin; do
if [[ ! -e "${dir}/${src}" ]]; then
echo "Creating ${src} - > ${dst} link in ${dir}"
ln -sv "${dir}/${dst}" "${dir}/${src}"
fi
done
done
for dst in /usr/python/lib/*
do
src="/usr/local/lib/$(basename "${dst}")"
if [[ -e "${src}" ]]; then
rm -rf "${src}"
fi
echo "Linking ${dst} to ${src}"
ln -sv "${dst}" "${src}"
done
ldconfig
}
function install_debian_runtime_dependencies() {
apt-get update
apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1
apt-get install -y --no-install-recommends wget curl gnupg2 lsb-release ca-certificates
# shellcheck disable=SC2086
export ${ADDITIONAL_RUNTIME_APT_ENV?}
if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}"
fi
if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}"
fi
apt-get update
# shellcheck disable=SC2086
apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS}
apt-get autoremove -yqq --purge
apt-get clean
link_python
rm -rf /var/lib/apt/lists/* /var/log/*
}
function install_python() {
# If system python (3.11 in bookworm) is installed (via automatic installation of some dependencies for example), we need
# to fail and make sure that it is not there, because there can be strange interactions if we install
# newer version and system libraries are installed, because
# when you create a virtualenv part of the shared libraries of Python can be taken from the system
# Installation leading to weird errors when you want to install some modules - for example when you install ssl:
# /usr/python/lib/python3.11/lib-dynload/_ssl.cpython-311-aarch64-linux-gnu.so: undefined symbol: _PyModule_Add
if dpkg -l | grep '^ii' | grep '^ii libpython' >/dev/null; then
echo
echo "ERROR! System python is installed by one of the previous steps"
echo
echo "Please make sure that no python packages are installed by default. Displaying the reason why libpython3.11 is installed:"
echo
apt-get install -yqq aptitude >/dev/null
aptitude why libpython3.11
echo
exit 1
else
echo
echo "GOOD! System python is not installed - OK"
echo
fi
wget -O python.tar.xz "https://www.python.org/ftp/python/${AIRFLOW_PYTHON_VERSION%%[a-z]*}/Python-${AIRFLOW_PYTHON_VERSION}.tar.xz"
wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${AIRFLOW_PYTHON_VERSION%%[a-z]*}/Python-${AIRFLOW_PYTHON_VERSION}.tar.xz.asc";
declare -A keys=(
# gpg: key B26995E310250568: public key "\xc5\x81ukasz Langa (GPG langa.pl) <lukasz@langa.pl>" imported
# https://peps.python.org/pep-0596/#release-manager-and-crew
[3.9]="E3FF2839C048B25C084DEBE9B26995E310250568"
# gpg: key 64E628F8D684696D: public key "Pablo Galindo Salgado <pablogsal@gmail.com>" imported
# https://peps.python.org/pep-0619/#release-manager-and-crew
[3.10]="A035C8C19219BA821ECEA86B64E628F8D684696D"
# gpg: key 64E628F8D684696D: public key "Pablo Galindo Salgado <pablogsal@gmail.com>" imported
# https://peps.python.org/pep-0664/#release-manager-and-crew
[3.11]="A035C8C19219BA821ECEA86B64E628F8D684696D"
# gpg: key A821E680E5FA6305: public key "Thomas Wouters <thomas@python.org>" imported
# https://peps.python.org/pep-0693/#release-manager-and-crew
[3.12]="7169605F62C751356D054A26A821E680E5FA6305"
# gpg: key A821E680E5FA6305: public key "Thomas Wouters <thomas@python.org>" imported
# https://peps.python.org/pep-0719/#release-manager-and-crew
[3.13]="7169605F62C751356D054A26A821E680E5FA6305"
)
major_minor_version="${AIRFLOW_PYTHON_VERSION%.*}"
echo "Verifying Python ${AIRFLOW_PYTHON_VERSION} (${major_minor_version})"
GNUPGHOME="$(mktemp -d)"; export GNUPGHOME;
gpg_key="${keys[${major_minor_version}]}"
echo "Using GPG key ${gpg_key}"
gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "${gpg_key}"
gpg --batch --verify python.tar.xz.asc python.tar.xz;
gpgconf --kill all
rm -rf "$GNUPGHOME" python.tar.xz.asc
mkdir -p /usr/src/python
tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz
rm python.tar.xz
cd /usr/src/python
arch="$(dpkg --print-architecture)"; arch="${arch##*-}"
gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"
EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"
EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer";
LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"
LDFLAGS="${LDFLAGS:--Wl},--strip-all"
./configure --enable-optimizations --prefix=/usr/python/ --with-ensurepip --build="$gnuArch" \
--enable-loadable-sqlite-extensions --enable-option-checking=fatal \
--enable-shared --with-lto
make -s -j "$(nproc)" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
"LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" python
make -s -j "$(nproc)" install
cd /
rm -rf /usr/src/python
find /usr/python -depth \
\( \
\( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \
-o \( -type f -a \( -name 'libpython*.a' \) \) \
\) -exec rm -rf '{}' +
link_python
}
function install_golang() {
curl "https://dl.google.com/go/go${GOLANG_MAJOR_MINOR_VERSION}.linux-$(dpkg --print-architecture).tar.gz" -o "go${GOLANG_MAJOR_MINOR_VERSION}.linux.tar.gz"
rm -rf /usr/local/go && tar -C /usr/local -xzf go"${GOLANG_MAJOR_MINOR_VERSION}".linux.tar.gz
}
function apt_clean() {
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false
rm -rf /var/lib/apt/lists/* /var/log/*
}
if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then
get_runtime_apt_deps
install_debian_runtime_dependencies
install_docker_cli
apt_clean
else
get_dev_apt_deps
install_debian_dev_dependencies
install_python
install_additional_dev_dependencies
if [[ "${INSTALLATION_TYPE}" == "CI" ]]; then
install_golang
fi
install_docker_cli
apt_clean
fi
EOF
# The content below is automatically copied from scripts/docker/install_mysql.sh
COPY <<"EOF" /install_mysql.sh
#!/usr/bin/env bash
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
set -euo pipefail
common::get_colors
declare -a packages
readonly MYSQL_LTS_VERSION="8.0"
readonly MARIADB_LTS_VERSION="10.11"
: "${INSTALL_MYSQL_CLIENT:?Should be true or false}"
: "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}"
retry() {
local retries=3
local count=0
# adding delay of 10 seconds
local delay=10
until "$@"; do
exit_code=$?
count=$((count + 1))
if [[ $count -lt $retries ]]; then
echo "Command failed. Attempt $count/$retries. Retrying in ${delay}s..."
sleep $delay
else
echo "Command failed after $retries attempts."
return $exit_code
fi
done
}
install_mysql_client() {
if [[ "${1}" == "dev" ]]; then
packages=("libmysqlclient-dev" "mysql-client")
elif [[ "${1}" == "prod" ]]; then
# `libmysqlclientXX` where XX is number, and it should be increased every new GA MySQL release, for example
# 18 - MySQL 5.6.48
# 20 - MySQL 5.7.42
# 21 - MySQL 8.0.34
# 22 - MySQL 8.1
packages=("libmysqlclient21" "mysql-client")
else
echo
echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}"
echo
exit 1
fi
common::import_trusted_gpg "B7B3B788A8D3785C" "mysql"
echo
echo "${COLOR_BLUE}Installing Oracle MySQL client version ${MYSQL_LTS_VERSION}: ${1}${COLOR_RESET}"
echo
echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \
/etc/apt/sources.list.d/mysql.list
retry apt-get update
retry apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
# Remove mysql repository from sources.list.d as MySQL repos have a basic flaw that they put expiry
# date on their GPG signing keys and they sign their repo with those keys. This means that after a
# certain date, the GPG key becomes invalid and if you have the repository added in your sources.list
# then you will not be able to install anything from any other repository. This id unlike any other
# repository we have seen (for example Postgres, MariaDB, MsSQL - all have non-expiring signing keys)
rm /etc/apt/sources.list.d/mysql.list
}
install_mariadb_client() {
# List of compatible package Oracle MySQL -> MariaDB:
# `mysql-client` -> `mariadb-client` or `mariadb-client-compat` (11+)
# `libmysqlclientXX` (where XX is a number) -> `libmariadb3-compat`
# `libmysqlclient-dev` -> `libmariadb-dev-compat`
#
# Different naming against Debian repo which we used before
# that some of packages might contains `-compat` suffix, Debian repo -> MariaDB repo:
# `libmariadb-dev` -> `libmariadb-dev-compat`
# `mariadb-client-core` -> `mariadb-client` or `mariadb-client-compat` (11+)
if [[ "${1}" == "dev" ]]; then
packages=("libmariadb-dev-compat" "mariadb-client")
elif [[ "${1}" == "prod" ]]; then
packages=("libmariadb3-compat" "mariadb-client")
else
echo
echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}"
echo
exit 1
fi
common::import_trusted_gpg "0xF1656F24C74CD1D8" "mariadb"
echo
echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_LTS_VERSION}: ${1}${COLOR_RESET}"
echo "${COLOR_YELLOW}MariaDB client protocol-compatible with MySQL client.${COLOR_RESET}"
echo
echo "deb [arch=amd64,arm64] https://archive.mariadb.org/mariadb-${MARIADB_LTS_VERSION}/repo/debian/ $(lsb_release -cs) main" > \
/etc/apt/sources.list.d/mariadb.list
# Make sure that dependencies from MariaDB repo are preferred over Debian dependencies
printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb
retry apt-get update
retry apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then
if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then
INSTALL_MYSQL_CLIENT_TYPE="mariadb"
echo
echo "${COLOR_YELLOW}Client forced to mariadb for ARM${COLOR_RESET}"
echo
fi
if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mysql" ]]; then
install_mysql_client "${@}"
elif [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mariadb" ]]; then
install_mariadb_client "${@}"
else
echo
echo "${COLOR_RED}Specify either mysql or mariadb, got ${INSTALL_MYSQL_CLIENT_TYPE}${COLOR_RESET}"
echo
exit 1
fi
fi
EOF
# The content below is automatically copied from scripts/docker/install_mssql.sh
COPY <<"EOF" /install_mssql.sh
#!/usr/bin/env bash
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
set -euo pipefail
common::get_colors
declare -a packages
: "${INSTALL_MSSQL_CLIENT:?Should be true or false}"
function install_mssql_client() {
# Install MsSQL client from Microsoft repositories
if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then
echo
echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}"
echo
return
fi
packages=("msodbcsql18")
common::import_trusted_gpg "EB3E94ADBE1229CF" "microsoft"
echo
echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}"
echo
echo "deb [arch=amd64,arm64] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod $(lsb_release -cs) main" > \
/etc/apt/sources.list.d/mssql-release.list &&
mkdir -p /opt/microsoft/msodbcsql18 &&
touch /opt/microsoft/msodbcsql18/ACCEPT_EULA &&
apt-get update -yqq &&
apt-get upgrade -yqq &&
apt-get -yqq install --no-install-recommends "${packages[@]}" &&
apt-get autoremove -yqq --purge &&
apt-get clean &&
rm -rf /var/lib/apt/lists/*
}
install_mssql_client "${@}"
EOF
# The content below is automatically copied from scripts/docker/install_postgres.sh
COPY <<"EOF" /install_postgres.sh
#!/usr/bin/env bash
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
set -euo pipefail
common::get_colors
declare -a packages
: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}"
install_postgres_client() {
echo
echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}"
echo
if [[ "${1}" == "dev" ]]; then
packages=("libpq-dev" "postgresql-client")
elif [[ "${1}" == "prod" ]]; then
packages=("postgresql-client")
else
echo
echo "Specify either prod or dev"
echo
exit 1
fi
common::import_trusted_gpg "7FCC7D46ACCC4CF8" "postgres"
echo "deb [arch=amd64,arm64] https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > \
/etc/apt/sources.list.d/pgdg.list
apt-get update
apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then
install_postgres_client "${@}"
fi
EOF
# The content below is automatically copied from scripts/docker/install_packaging_tools.sh
COPY <<"EOF" /install_packaging_tools.sh
#!/usr/bin/env bash
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
common::get_colors
common::get_packaging_tool
common::show_packaging_tool_version_and_location
common::install_packaging_tools
EOF
# The content below is automatically copied from scripts/docker/common.sh
COPY <<"EOF" /common.sh
#!/usr/bin/env bash
set -euo pipefail
function common::get_colors() {
COLOR_BLUE=$'\e[34m'
COLOR_GREEN=$'\e[32m'
COLOR_RED=$'\e[31m'
COLOR_RESET=$'\e[0m'
COLOR_YELLOW=$'\e[33m'
export COLOR_BLUE
export COLOR_GREEN
export COLOR_RED
export COLOR_RESET
export COLOR_YELLOW
}
function common::get_packaging_tool() {
: "${AIRFLOW_USE_UV:?Should be set}"
## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN
## `scripts/in_container/_in_container_utils.sh`
if [[ ${AIRFLOW_USE_UV} == "true" ]]; then
echo
echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}"
echo
export PACKAGING_TOOL="uv"
export PACKAGING_TOOL_CMD="uv pip"
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." && -f "./pyproject.toml" ]]; then
# for uv only install dev group when we install from sources
export EXTRA_INSTALL_FLAGS="--group=dev --no-binary lxml --no-binary xmlsec"
else
export EXTRA_INSTALL_FLAGS="--no-binary lxml --no-binary xmlsec"
fi
export EXTRA_UNINSTALL_FLAGS=""
export UPGRADE_TO_HIGHEST_RESOLUTION="--upgrade --resolution highest"
export UPGRADE_IF_NEEDED="--upgrade"
UV_CONCURRENT_DOWNLOADS=$(nproc --all)
export UV_CONCURRENT_DOWNLOADS
if [[ ${INCLUDE_PRE_RELEASE=} == "true" ]]; then
EXTRA_INSTALL_FLAGS="${EXTRA_INSTALL_FLAGS} --prerelease if-necessary"
fi
else
echo
echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}"
echo
export PACKAGING_TOOL="pip"
export PACKAGING_TOOL_CMD="pip"
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
export EXTRA_INSTALL_FLAGS="--root-user-action ignore --no-binary lxml,xmlsec"
export EXTRA_UNINSTALL_FLAGS="--yes"
export UPGRADE_TO_HIGHEST_RESOLUTION="--upgrade --upgrade-strategy eager"
export UPGRADE_IF_NEEDED="--upgrade --upgrade-strategy only-if-needed"
if [[ ${INCLUDE_PRE_RELEASE=} == "true" ]]; then
EXTRA_INSTALL_FLAGS="${EXTRA_INSTALL_FLAGS} --pre"
fi
fi
}
function common::get_airflow_version_specification() {
if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=}
&& -n ${AIRFLOW_VERSION}
&& ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then
AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}"
fi
}
function common::get_constraints_location() {
# auto-detect Airflow-constraint reference and location
if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then
if [[ ${AIRFLOW_VERSION} =~ v?2.* || ${AIRFLOW_VERSION} =~ v?3.* ]]; then
AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION}
else
AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH}
fi
fi
if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then
local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}"
local python_version
python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt"
fi
if [[ ${AIRFLOW_CONSTRAINTS_LOCATION} =~ http.* ]]; then
echo
echo "${COLOR_BLUE}Downloading constraints from ${AIRFLOW_CONSTRAINTS_LOCATION} to ${HOME}/constraints.txt ${COLOR_RESET}"
echo
curl -sSf -o "${HOME}/constraints.txt" "${AIRFLOW_CONSTRAINTS_LOCATION}"
else
echo
echo "${COLOR_BLUE}Copying constraints from ${AIRFLOW_CONSTRAINTS_LOCATION} to ${HOME}/constraints.txt ${COLOR_RESET}"
echo
cp "${AIRFLOW_CONSTRAINTS_LOCATION}" "${HOME}/constraints.txt"
fi
}
function common::show_packaging_tool_version_and_location() {
echo "PATH=${PATH}"
echo "Installed pip: $(pip --version): $(which pip)"
if [[ ${PACKAGING_TOOL} == "pip" ]]; then
echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}"
else
echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}"
echo "Installed uv: $(uv --version 2>/dev/null || echo "Not installed yet"): $(which uv 2>/dev/null)"
fi
}
function common::install_packaging_tools() {
: "${AIRFLOW_USE_UV:?Should be set}"
if [[ "${VIRTUAL_ENV=}" != "" ]]; then
echo
echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}"
echo
else
echo
echo "${COLOR_BLUE}Checking packaging tools for system Python installation: $(which python)${COLOR_RESET}"
echo
fi
if [[ ${AIRFLOW_PIP_VERSION=} == "" ]]; then
echo
echo "${COLOR_BLUE}Installing latest pip version${COLOR_RESET}"
echo
pip install --root-user-action ignore --disable-pip-version-check --upgrade pip
elif [[ ! ${AIRFLOW_PIP_VERSION} =~ ^[0-9].* ]]; then
echo
echo "${COLOR_BLUE}Installing pip version from spec ${AIRFLOW_PIP_VERSION}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
pip install --root-user-action ignore --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}"
else
local installed_pip_version
installed_pip_version=$(python -c 'from importlib.metadata import version; print(version("pip"))')
if [[ ${installed_pip_version} != "${AIRFLOW_PIP_VERSION}" ]]; then
echo
echo "${COLOR_BLUE}(Re)Installing pip version: ${AIRFLOW_PIP_VERSION}${COLOR_RESET}"
echo
pip install --root-user-action ignore --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}"
fi
fi
if [[ ${AIRFLOW_UV_VERSION=} == "" ]]; then
echo
echo "${COLOR_BLUE}Installing latest uv version${COLOR_RESET}"
echo
pip install --root-user-action ignore --disable-pip-version-check --upgrade uv
elif [[ ! ${AIRFLOW_UV_VERSION} =~ ^[0-9].* ]]; then
echo
echo "${COLOR_BLUE}Installing uv version from spec ${AIRFLOW_UV_VERSION}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
pip install --root-user-action ignore --disable-pip-version-check "uv @ ${AIRFLOW_UV_VERSION}"
else
local installed_uv_version
installed_uv_version=$(python -c 'from importlib.metadata import version; print(version("uv"))' 2>/dev/null || echo "Not installed yet")
if [[ ${installed_uv_version} != "${AIRFLOW_UV_VERSION}" ]]; then
echo
echo "${COLOR_BLUE}(Re)Installing uv version: ${AIRFLOW_UV_VERSION}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
pip install --root-user-action ignore --disable-pip-version-check "uv==${AIRFLOW_UV_VERSION}"
fi
fi
if [[ ${AIRFLOW_PREK_VERSION=} == "" ]]; then
echo
echo "${COLOR_BLUE}Installing latest prek, uv${COLOR_RESET}"
echo
uv tool install prek --with uv
# make sure that the venv/user in .local exists
mkdir -p "${HOME}/.local/bin"
else
echo
echo "${COLOR_BLUE}Installing predefined versions of prek, uv:${COLOR_RESET}"
echo "${COLOR_BLUE}prek(${AIRFLOW_PREK_VERSION}) uv(${AIRFLOW_UV_VERSION})${COLOR_RESET}"
echo
uv tool install "prek==${AIRFLOW_PREK_VERSION}" --with "uv==${AIRFLOW_UV_VERSION}"
# make sure that the venv/user in .local exists
mkdir -p "${HOME}/.local/bin"
fi
}
function common::import_trusted_gpg() {
common::get_colors
local key=${1:?${COLOR_RED}First argument expects OpenPGP Key ID${COLOR_RESET}}
local name=${2:?${COLOR_RED}Second argument expected trust storage name${COLOR_RESET}}
# Please note that not all servers could be used for retrieve keys
# sks-keyservers.net: Unmaintained and DNS taken down due to GDPR requests.
# keys.openpgp.org: User ID Mandatory, not suitable for APT repositories
# keyring.debian.org: Only accept keys in Debian keyring.
# pgp.mit.edu: High response time.
local keyservers=(
"hkps://keyserver.ubuntu.com"
"hkps://pgp.surf.nl"
)
GNUPGHOME="$(mktemp -d)"
export GNUPGHOME
set +e
for keyserver in $(shuf -e "${keyservers[@]}"); do
echo "${COLOR_BLUE}Try to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}"
gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break
echo "${COLOR_YELLOW}Unable to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}"
done
set -e
gpg --export "${key}" > "/etc/apt/trusted.gpg.d/${name}.gpg"
gpgconf --kill all
rm -rf "${GNUPGHOME}"
unset GNUPGHOME
}
EOF
# The content below is automatically copied from scripts/docker/install_airflow_when_building_images.sh
COPY <<"EOF" /install_airflow_when_building_images.sh
#!/usr/bin/env bash
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
function install_from_sources() {
local installation_command_flags
local fallback_no_constraints_installation
fallback_no_constraints_installation="false"
local extra_sync_flags
extra_sync_flags=""
if [[ ${VIRTUAL_ENV=} != "" ]]; then
extra_sync_flags="--active"
fi
if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then
if [[ ${PACKAGING_TOOL_CMD} == "pip" ]]; then
set +x
echo
echo "${COLOR_RED}We only support uv not pip installation for upgrading dependencies!.${COLOR_RESET}"
echo
exit 1
fi
set +x
echo
echo "${COLOR_BLUE}Attempting to upgrade all packages to highest versions.${COLOR_RESET}"
echo
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
set -x
uv sync --all-packages --resolution highest --group dev --group docs --group docs-gen \
--group leveldb ${extra_sync_flags} --no-binary-package lxml --no-binary-package xmlsec \
--no-python-downloads --no-managed-python
else
# We only use uv here but Installing using constraints is not supported with `uv sync`, so we
# do not use ``uv sync`` because we are not committing and using uv.lock yet.
# Once we switch to uv.lock (with the workflow that dependabot will update it
# and constraints will be generated from it, we should be able to simply use ``uv sync`` here)
# So for now when we are installing with constraints we need to install airflow distributions first and
# separately each provider that has some extra development dependencies - otherwise `dev`
# dependency groups will not be installed because ``uv pip install --editable .`` only installs dev
# dependencies for the "top level" pyproject.toml
set +x
echo
echo
echo "${COLOR_BLUE}Installing first airflow distribution with constraints.${COLOR_RESET}"
echo
installation_command_flags=" --editable .[${AIRFLOW_EXTRAS}] \
--editable ./airflow-core --editable ./task-sdk --editable ./airflow-ctl \
--editable ./kubernetes-tests --editable ./docker-tests --editable ./helm-tests \
--editable ./task-sdk-tests \
--editable ./airflow-ctl-tests \
--editable ./devel-common[all] --editable ./dev \
--group dev --group docs --group docs-gen --group leveldb"
local -a projects_with_devel_dependencies
while IFS= read -r -d '' pyproject_toml_file; do
project_folder=$(dirname ${pyproject_toml_file})
echo "${COLOR_BLUE}Checking provider ${project_folder} for development dependencies ${COLOR_RESET}"
first_line_of_devel_deps=$(grep -A 1 "# Additional devel dependencies (do not remove this line and add extra development dependencies)" ${project_folder}/pyproject.toml | tail -n 1)
if [[ "$first_line_of_devel_deps" != "]" ]]; then
projects_with_devel_dependencies+=("${project_folder}")
fi
installation_command_flags+=" --editable ${project_folder}"
done < <(find "providers" -name "pyproject.toml" -print0 | sort -z)
set -x
if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} --constraint "${HOME}/constraints.txt"; then
fallback_no_constraints_installation="true"
else
# For production image, we do not add devel dependencies in prod image
if [[ ${AIRFLOW_IMAGE_TYPE=} == "ci" ]]; then
set +x
echo
echo "${COLOR_BLUE}Installing all providers with development dependencies.${COLOR_RESET}"
echo
for project_folder in "${projects_with_devel_dependencies[@]}"; do
echo "${COLOR_BLUE}Installing provider ${project_folder} with development dependencies.${COLOR_RESET}"
set -x
if ! uv pip install --editable . --directory "${project_folder}" \
--constraint "${HOME}/constraints.txt" --group dev \
--no-python-downloads --no-managed-python; then
fallback_no_constraints_installation="true"
fi
set +x
done
fi
fi
set +x
if [[ ${fallback_no_constraints_installation} == "true" ]]; then
echo
echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies conflicting with constraints.${COLOR_RESET}"
echo
echo "${COLOR_BLUE}Falling back to no-constraints installation.${COLOR_RESET}"
echo
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
set -x
uv sync --all-packages --group dev --group docs --group docs-gen \
--group leveldb ${extra_sync_flags} --no-binary-package lxml --no-binary-package xmlsec \
--no-python-downloads --no-managed-python
set +x
fi
fi
}
function install_from_external_spec() {
local installation_command_flags
if [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then
installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}"
else
echo
echo "${COLOR_RED}The '${INSTALLATION_METHOD}' installation method is not supported${COLOR_RESET}"
echo
echo "${COLOR_YELLOW}Supported methods are ('.', 'apache-airflow')${COLOR_RESET}"
echo
exit 1
fi
if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then
echo
echo "${COLOR_BLUE}Remove airflow and all provider distributions installed before potentially${COLOR_RESET}"
echo
set -x
${PACKAGING_TOOL_CMD} freeze | grep apache-airflow | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true
set +x
echo
echo "${COLOR_BLUE}Installing all packages with highest resolutions. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}"
echo
set -x
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_TO_HIGHEST_RESOLUTION} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags}
set +x
else
echo
echo "${COLOR_BLUE}Installing all packages with constraints. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}"
echo
set -x
if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} --constraint "${HOME}/constraints.txt"; then
set +x
echo
echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies conflicting with constraints.${COLOR_RESET}"
echo
echo "${COLOR_BLUE}Falling back to no-constraints installation.${COLOR_RESET}"
echo
set -x
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags}
set +x
fi
fi
}
function install_airflow_when_building_images() {
# Remove mysql from extras if client is not going to be installed
if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}
echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}"
fi
# Remove postgres from extras if client is not going to be installed
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}"
fi
# Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method
if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then
install_from_sources
else
install_from_external_spec
fi
set +x
common::install_packaging_tools
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
}
common::get_colors
common::get_packaging_tool
common::get_airflow_version_specification
common::get_constraints_location
common::show_packaging_tool_version_and_location
install_airflow_when_building_images
EOF
# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh
COPY <<"EOF" /install_additional_dependencies.sh
#!/usr/bin/env bash
set -euo pipefail
: "${ADDITIONAL_PYTHON_DEPS:?Should be set}"
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
function install_additional_dependencies() {
if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then
echo
echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}"
echo
set -x
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_TO_HIGHEST_RESOLUTION} \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
${ADDITIONAL_PYTHON_DEPS}
set +x
common::install_packaging_tools
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
else
echo
echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}"
echo
set -x
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
${ADDITIONAL_PYTHON_DEPS}
set +x
common::install_packaging_tools
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
fi
}
common::get_colors
common::get_packaging_tool
common::get_airflow_version_specification
common::get_constraints_location
common::show_packaging_tool_version_and_location
install_additional_dependencies
EOF
# The content below is automatically copied from scripts/docker/entrypoint_ci.sh
COPY <<"EOF" /entrypoint_ci.sh
#!/usr/bin/env bash
function set_verbose() {
if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then
set -x
else
set +x
fi
}
set_verbose
. "${AIRFLOW_SOURCES:-/opt/airflow}"/scripts/in_container/_in_container_script_init.sh
LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6"
export LD_PRELOAD
chmod 1777 /tmp
AIRFLOW_SOURCES=$(cd "${IN_CONTAINER_DIR}/../.." || exit 1; pwd)
PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.10}
export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}}
mkdir "${AIRFLOW_HOME}/sqlite" -p || true
ASSET_COMPILATION_WAIT_MULTIPLIER=${ASSET_COMPILATION_WAIT_MULTIPLIER:=1}
if [[ "${CI=}" == "true" ]]; then
export COLUMNS="202"
fi
. "${IN_CONTAINER_DIR}/check_connectivity.sh"
function wait_for_asset_compilation() {
if [[ -f "${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock" ]]; then
echo
echo "${COLOR_YELLOW}Waiting for asset compilation to complete in the background.${COLOR_RESET}"
echo
local counter=0
while [[ -f "${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock" ]]; do
if (( counter % 5 == 2 )); then
echo "${COLOR_BLUE}Still waiting .....${COLOR_RESET}"
fi
sleep 1
((counter=counter+1))
if [[ ${counter} == 30*$ASSET_COMPILATION_WAIT_MULTIPLIER ]]; then
echo
echo "${COLOR_YELLOW}The asset compilation is taking too long.${COLOR_YELLOW}"
echo """
If it does not complete soon, you might want to stop it and remove file lock:
* press Ctrl-C
* run 'rm ${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock'
"""
fi
if [[ ${counter} == 60*$ASSET_COMPILATION_WAIT_MULTIPLIER ]]; then
echo
echo "${COLOR_RED}The asset compilation is taking too long. Exiting.${COLOR_RED}"
echo "${COLOR_RED}refer to dev/breeze/doc/04_troubleshooting.rst for resolution steps.${COLOR_RED}"
echo
exit 1
fi
done
fi
if [ -f "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out" ]; then
echo
echo "${COLOR_RED}The asset compilation failed. Exiting.${COLOR_RESET}"
echo
cat "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out"
rm "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out"
echo
exit 1
fi
}
function environment_initialization() {
if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} == "true" ]]; then
return
fi
echo
echo "${COLOR_BLUE}Running Initialization. Your basic configuration is:${COLOR_RESET}"
echo
echo " * ${COLOR_BLUE}Airflow home:${COLOR_RESET} ${AIRFLOW_HOME}"
echo " * ${COLOR_BLUE}Airflow sources:${COLOR_RESET} ${AIRFLOW_SOURCES}"
echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN:=}"
if [[ ${BACKEND=} == "postgres" ]]; then
echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Postgres: ${POSTGRES_VERSION}"
elif [[ ${BACKEND=} == "mysql" ]]; then
echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} MySQL: ${MYSQL_VERSION}"
elif [[ ${BACKEND=} == "sqlite" ]]; then
echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Sqlite"
fi
echo
if [[ ${STANDALONE_DAG_PROCESSOR=} == "true" ]]; then
echo
echo "${COLOR_BLUE}Forcing scheduler/standalone_dag_processor to True${COLOR_RESET}"
echo
export AIRFLOW__SCHEDULER__STANDALONE_DAG_PROCESSOR=True
fi
RUN_TESTS=${RUN_TESTS:="false"}
CI=${CI:="false"}
# Added to have run-tests on path
export PATH=${PATH}:${AIRFLOW_SOURCES}:/usr/local/go/bin/
mkdir -pv "${AIRFLOW_HOME}/logs/"
# Change the default worker_concurrency for tests
export AIRFLOW__CELERY__WORKER_CONCURRENCY=8
set +e
# shellcheck source=scripts/in_container/configure_environment.sh
. "${IN_CONTAINER_DIR}/configure_environment.sh"
# shellcheck source=scripts/in_container/run_init_script.sh
. "${IN_CONTAINER_DIR}/run_init_script.sh"
"${IN_CONTAINER_DIR}/check_environment.sh"
ENVIRONMENT_EXIT_CODE=$?
set -e
if [[ ${ENVIRONMENT_EXIT_CODE} != 0 ]]; then
echo
echo "Error: check_environment returned ${ENVIRONMENT_EXIT_CODE}. Exiting."
echo
exit ${ENVIRONMENT_EXIT_CODE}
fi
mkdir -p /usr/lib/google-cloud-sdk/bin
touch /usr/lib/google-cloud-sdk/bin/gcloud
ln -s -f /usr/bin/gcloud /usr/lib/google-cloud-sdk/bin/gcloud
if [[ ${SKIP_SSH_SETUP="false"} == "false" ]]; then
# Set up ssh keys
echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -m PEM -P '' -f ~/.ssh/id_rsa \
>"${AIRFLOW_HOME}/logs/ssh-keygen.log" 2>&1
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2
chmod 600 ~/.ssh/*
# SSH Service
sudo service ssh restart >/dev/null 2>&1
# Sometimes the server is not quick enough to load the keys!
while [[ $(ssh-keyscan -H localhost 2>/dev/null | wc -l) != "3" ]] ; do
echo "Not all keys yet loaded by the server"
sleep 0.05
done
ssh-keyscan -H localhost >> ~/.ssh/known_hosts 2>/dev/null
fi
if [[ ${INTEGRATION_LOCALSTACK:-"false"} == "true" ]]; then
echo
echo "${COLOR_BLUE}Configuring LocalStack integration${COLOR_RESET}"
echo
# Define LocalStack AWS configuration
declare -A localstack_config=(
["AWS_ENDPOINT_URL"]="http://localstack:4566"
["AWS_ACCESS_KEY_ID"]="test"
["AWS_SECRET_ACCESS_KEY"]="test"
["AWS_DEFAULT_REGION"]="us-east-1"
)
# Export each configuration variable and log it
for key in "${!localstack_config[@]}"; do
export "$key"="${localstack_config[$key]}"
echo " * ${COLOR_BLUE}${key}:${COLOR_RESET} ${localstack_config[$key]}"
done
echo
fi
cd "${AIRFLOW_SOURCES}"
# Temporarily add /opt/airflow/providers/standard/tests to PYTHONPATH in order to see example dags
# in the UI when testing in Breeze. This might be solved differently in the future
if [[ -d /opt/airflow/providers/standard/tests ]]; then
export PYTHONPATH=${PYTHONPATH=}:/opt/airflow/providers/standard/tests
fi
if [[ ${START_AIRFLOW:="false"} == "true" || ${START_AIRFLOW} == "True" ]]; then
if [[ ${BREEZE_DEBUG_CELERY_WORKER=} == "true" ]]; then
export AIRFLOW__CELERY__POOL=${AIRFLOW__CELERY__POOL:-solo}
fi
export AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES}
wait_for_asset_compilation
if [[ ${USE_MPROCS:="false"} == "true" || ${USE_MPROCS} == "True" ]]; then
# shellcheck source=scripts/in_container/bin/run_mprocs
exec run_mprocs
else
# shellcheck source=scripts/in_container/bin/run_tmux
exec run_tmux
fi
fi
}
function handle_mount_sources() {
if [[ ${MOUNT_SOURCES=} == "remove" ]]; then
echo
echo "${COLOR_BLUE}Mounted sources are removed, cleaning up mounted dist-info files${COLOR_RESET}"
echo
rm -rf /usr/local/lib/python"${PYTHON_MAJOR_MINOR_VERSION}"/site-packages/apache_airflow*.dist-info/
fi
}
function determine_airflow_to_use() {
USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}"
if [[ "${USE_AIRFLOW_VERSION}" == "" && "${USE_DISTRIBUTIONS_FROM_DIST=}" != "true" ]]; then
export PYTHONPATH=${AIRFLOW_SOURCES}
echo
echo "${COLOR_BLUE}Using airflow version from current sources${COLOR_RESET}"
echo
# Cleanup the logs, tmp when entering the environment
sudo rm -rf "${AIRFLOW_SOURCES}"/logs/*
sudo rm -rf "${AIRFLOW_SOURCES}"/tmp/*
mkdir -p "${AIRFLOW_SOURCES}"/logs/
mkdir -p "${AIRFLOW_SOURCES}"/tmp/
else
if [[ ${CLEAN_AIRFLOW_INSTALLATION=} == "true" ]]; then
echo
echo "${COLOR_BLUE}Uninstalling all packages first${COLOR_RESET}"
echo
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | grep -v "@" | \
xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS}
# Now install rich ad click first to use the installation script
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} rich rich-click click \
--constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt
fi
echo
echo "${COLOR_BLUE}Reinstalling all development dependencies${COLOR_RESET}"
echo
# Use uv run to install necessary dependencies automatically
# in the future we will be able to use uv sync when `uv.lock` is supported
# for the use in parallel runs in docker containers--no-cache is needed - otherwise there is
# possibility of overriding temporary environments by multiple parallel processes
uv run --no-cache /opt/airflow/scripts/in_container/install_development_dependencies.py \
--constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-"${PYTHON_MAJOR_MINOR_VERSION}".txt
# Some packages might leave legacy typing module which causes test issues
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} typing || true
echo
echo "${COLOR_BLUE}Installing airflow and providers ${COLOR_RESET}"
echo
python "${IN_CONTAINER_DIR}/install_airflow_and_providers.py"
fi
if [[ "${USE_AIRFLOW_VERSION}" =~ ^2.* ]]; then
# Remove auth manager setting
unset AIRFLOW__CORE__AUTH_MANAGER
fi
if [[ "${USE_AIRFLOW_VERSION}" =~ ^2\.2\..*|^2\.1\..*|^2\.0\..* && "${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=}" != "" ]]; then
# make sure old variable is used for older airflow versions
export AIRFLOW__CORE__SQL_ALCHEMY_CONN="${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN}"
fi
}
function check_boto_upgrade() {
if [[ ${UPGRADE_BOTO=} != "true" ]]; then
return
fi
echo
echo "${COLOR_BLUE}Upgrading boto3, botocore to latest version to run Amazon tests with them${COLOR_RESET}"
echo
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} aiobotocore s3fs || true
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade "boto3<1.38.3" "botocore<1.38.3"
}
function check_upgrade_sqlalchemy() {
# The python version constraint is a TEMPORARY WORKAROUND to exclude all FAB tests. Is should be removed once we
# upgrade FAB to v5 (PR #50960).
if [[ "${UPGRADE_SQLALCHEMY=}" != "true" || ${PYTHON_MAJOR_MINOR_VERSION} != "3.13" ]]; then
return
fi
echo
echo "${COLOR_BLUE}Upgrading sqlalchemy to the latest version to run tests with it${COLOR_RESET}"
echo
uv sync --all-packages --no-install-package apache-airflow-providers-fab --resolution highest \
--no-python-downloads --no-managed-python
}
function check_downgrade_sqlalchemy() {
if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then
return
fi
local min_sqlalchemy_version
min_sqlalchemy_version=$(grep "sqlalchemy\[asyncio\]>=" airflow-core/pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs)
echo
echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "sqlalchemy[asyncio]==${min_sqlalchemy_version}"
pip check
}
function check_downgrade_pendulum() {
if [[ ${DOWNGRADE_PENDULUM=} != "true" || ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then
return
fi
local min_pendulum_version
min_pendulum_version=$(grep "pendulum>=" airflow-core/pyproject.toml | head -1 | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs)
echo
echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${min_pendulum_version}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${min_pendulum_version}"
pip check
}
function check_run_tests() {
if [[ ${RUN_TESTS=} != "true" ]]; then
return
fi
if [[ ${TEST_GROUP:=""} == "system" ]]; then
exec "${IN_CONTAINER_DIR}/run_system_tests.sh" "${@}"
else
exec "${IN_CONTAINER_DIR}/run_ci_tests.sh" "${@}"
fi
}
function check_force_lowest_dependencies() {
if [[ ${FORCE_LOWEST_DEPENDENCIES=} != "true" ]]; then
return
fi
if [[ ${TEST_TYPE=} =~ Providers\[.*\] ]]; then
local provider_id
# shellcheck disable=SC2001
provider_id=$(echo "${TEST_TYPE}" | sed 's/Providers\[\(.*\)\]/\1/')
echo
echo "${COLOR_BLUE}Forcing dependencies to lowest versions for provider: ${provider_id}${COLOR_RESET}"
echo
if ! /opt/airflow/scripts/in_container/is_provider_excluded.py "${provider_id}"; then
echo
echo "S${COLOR_YELLOW}Skipping ${provider_id} provider check on Python ${PYTHON_MAJOR_MINOR_VERSION}!${COLOR_RESET}"
echo
exit 0
fi
cd "${AIRFLOW_SOURCES}/providers/${provider_id/.//}" || exit 1
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
uv sync --resolution lowest-direct --no-binary-package lxml --no-binary-package xmlsec --all-extras \
--no-python-downloads --no-managed-python
else
echo
echo "${COLOR_BLUE}Forcing dependencies to lowest versions for Airflow.${COLOR_RESET}"
echo
cd "${AIRFLOW_SOURCES}/airflow-core"
# --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2
# (binary lxml embeds its own libxml2, while xmlsec uses system one).
# See https://bugs.launchpad.net/lxml/+bug/2110068
uv sync --resolution lowest-direct --no-binary-package lxml --no-binary-package xmlsec --all-extras \
--no-python-downloads --no-managed-python
fi
}
function check_airflow_python_client_installation() {
if [[ ${INSTALL_AIRFLOW_PYTHON_CLIENT=} != "true" ]]; then
return
fi
python "${IN_CONTAINER_DIR}/install_airflow_python_client.py"
}
function initialize_db() {
# If we are going to start the api server OR we are a system test (which may or may not start the api server,
# depending on the Airflow version being used to run the tests), then migrate the DB.
if [[ ${START_API_SERVER_WITH_EXAMPLES=} == "true" || ${TEST_GROUP:=""} == "system" ]]; then
echo
echo "${COLOR_BLUE}Initializing database${COLOR_RESET}"
echo
airflow db migrate
echo
echo "${COLOR_BLUE}Database initialized${COLOR_RESET}"
fi
}
function start_api_server_with_examples(){
USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}"
# Do not start the api server if either START_API_SERVER_WITH_EXAMPLES is false or the TEST_GROUP env var is not
# equal to "system".
if [[ ${START_API_SERVER_WITH_EXAMPLES=} != "true" && ${TEST_GROUP:=""} != "system" ]]; then
return
fi
# If the use Airflow version is set and it is <= 3.0.0 (which does not have the API server anyway) also return
if [[ ${USE_AIRFLOW_VERSION} != "" && ${USE_AIRFLOW_VERSION} < "3.0.0" ]]; then
return
fi
export AIRFLOW__CORE__LOAD_EXAMPLES=True
export AIRFLOW__API__EXPOSE_CONFIG=True
airflow dags reserialize
echo "Example dags parsing finished"
if airflow config get-value core auth_manager | grep -q "FabAuthManager"; then
echo "Create admin user"
airflow users create -u admin -p admin -f Thor -l Administrator -r Admin -e admin@email.domain || true
echo "Admin user created"
else
echo "Skipping user creation as auth manager different from Fab is used"
fi
echo
echo "${COLOR_BLUE}Starting airflow api server${COLOR_RESET}"
echo
airflow api-server --port 8080 --daemon
echo
echo "${COLOR_BLUE}Waiting for api-server to start${COLOR_RESET}"
echo
check_service_connection "Airflow api-server" "run_nc localhost 8080" 100
EXIT_CODE=$?
if [[ ${EXIT_CODE} != 0 ]]; then
echo
echo "${COLOR_RED}Api server did not start properly${COLOR_RESET}"
echo
exit ${EXIT_CODE}
fi
echo
echo "${COLOR_BLUE}Airflow api-server started${COLOR_RESET}"
}
handle_mount_sources
determine_airflow_to_use
environment_initialization
check_boto_upgrade
check_upgrade_sqlalchemy
check_downgrade_sqlalchemy
check_downgrade_pendulum
check_force_lowest_dependencies
check_airflow_python_client_installation
initialize_db
start_api_server_with_examples
check_run_tests "${@}"
exec /bin/bash "${@}"
EOF
# The content below is automatically copied from scripts/docker/entrypoint_exec.sh
COPY <<"EOF" /entrypoint_exec.sh
#!/usr/bin/env bash
. /opt/airflow/scripts/in_container/_in_container_script_init.sh
. /opt/airflow/scripts/in_container/configure_environment.sh
. /opt/airflow/scripts/in_container/run_init_script.sh
exec /bin/bash "${@}"
EOF
FROM ${BASE_IMAGE} as main
# Nolog bash flag is currently ignored - but you can replace it with other flags (for example
# xtrace - to show commands executed)
SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"]
ARG BASE_IMAGE
ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow"
# By increasing this number we can do force build of all dependencies.
# NOTE! When you want to make sure dependencies are installed from scratch in your PR after removing
# some dependencies, you also need to set "disable image cache" in your PR to make sure the image is
# not built using the "main" version of those dependencies.
ARG DEPENDENCIES_EPOCH_NUMBER="15"
# Make sure noninteractive debian install is used and language variables set
ENV BASE_IMAGE=${BASE_IMAGE} \
DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \
DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true" \
PIP_CACHE_DIR=/root/.cache/pip \
UV_CACHE_DIR=/root/.cache/uv
RUN echo "Base image version: ${BASE_IMAGE}"
ARG DEV_APT_COMMAND=""
ARG ADDITIONAL_DEV_APT_COMMAND=""
ARG ADDITIONAL_DEV_ENV_VARS=""
ARG ADDITIONAL_DEV_APT_DEPS="bash-completion dumb-init git gdb graphviz krb5-user \
less libenchant-2-2 libgcc-11-dev libgeos-dev libpq-dev net-tools netcat-openbsd \
openssh-server postgresql-client software-properties-common rsync tmux unzip vim xxd"
ARG ADDITIONAL_DEV_APT_ENV=""
ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \
ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \
ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND}
ARG AIRFLOW_PYTHON_VERSION="3.12.12"
ENV AIRFLOW_PYTHON_VERSION=${AIRFLOW_PYTHON_VERSION}
ENV GOLANG_MAJOR_MINOR_VERSION="1.25.5"
COPY --from=scripts install_os_dependencies.sh /scripts/docker/
RUN bash /scripts/docker/install_os_dependencies.sh ci
# In case system python is installed, setting LD_LIBRARY_PATH prevents any case the system python
# libraries will be accidentally used before the library installed from sources (which is newer and
# python interpreter might break if accidentally the old system libraries are used.
ENV LD_LIBRARY_PATH="/usr/python/lib"
COPY --from=scripts common.sh /scripts/docker/
# Only copy mysql/mssql installation scripts for now - so that changing the other
# scripts which are needed much later will not invalidate the docker layer here.
COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/
ARG HOME=/root
ARG AIRFLOW_IMAGE_TYPE="ci"
ARG AIRFLOW_HOME=/root/airflow
ARG AIRFLOW_SOURCES=/opt/airflow
ARG INSTALL_MYSQL_CLIENT_TYPE="mysql"
ENV HOME=${HOME} \
AIRFLOW_IMAGE_TYPE=${AIRFLOW_IMAGE_TYPE} \
AIRFLOW_HOME=${AIRFLOW_HOME} \
AIRFLOW_SOURCES=${AIRFLOW_SOURCES} \
INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE}
# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an
# unexpected result - the cache for Dockerfiles might get invalidated in case the host system
# had different umask set and group x bit was not set. In Azure the bit might be not set at all.
# That also protects against AUFS Docker backend problem where changing the executable bit required sync
RUN bash /scripts/docker/install_mysql.sh prod \
&& bash /scripts/docker/install_mysql.sh dev \
&& bash /scripts/docker/install_mssql.sh dev \
&& bash /scripts/docker/install_postgres.sh dev \
# The user is added to allow ssh debugging (you can connect with airflow/airflow over ssh)
&& adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \
--quiet "airflow" --home "/home/airflow" \
&& echo -e "airflow\nairflow" | passwd airflow 2>&1 \
&& echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
&& chmod 0440 /etc/sudoers.d/airflow
# Install Helm
ARG HELM_VERSION="v3.16.4"
RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \
&& PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \
&& HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \
&& curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \
&& chmod +x /usr/local/bin/helm
# Install mprocs - a modern process manager for managing multiple Airflow components
ARG MPROCS_VERSION="0.7.3"
RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \
&& PLATFORM="$(uname -m)" \
&& MPROCS_URL="https://github.com/pvolok/mprocs/releases/download/v${MPROCS_VERSION}/mprocs-${MPROCS_VERSION}-${SYSTEM}-${PLATFORM}-musl.tar.gz" \
&& echo "Downloading mprocs from ${MPROCS_URL}" \
&& curl --silent --location "${MPROCS_URL}" | tar -xz -C /usr/local/bin/ mprocs \
&& chmod +x /usr/local/bin/mprocs
WORKDIR ${AIRFLOW_SOURCES}
RUN mkdir -pv ${AIRFLOW_HOME} && \
mkdir -pv ${AIRFLOW_HOME}/dags && \
mkdir -pv ${AIRFLOW_HOME}/logs
ARG AIRFLOW_REPO=apache/airflow
ARG AIRFLOW_BRANCH=main
# Airflow Extras installed
ARG AIRFLOW_EXTRAS="all"
ARG ADDITIONAL_AIRFLOW_EXTRAS=""
# Allows to override constraints source
ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow"
ARG AIRFLOW_CONSTRAINTS_MODE="constraints-source-providers"
ARG AIRFLOW_CONSTRAINTS_REFERENCE=""
ARG AIRFLOW_CONSTRAINTS_LOCATION=""
ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main"
# By changing the epoch we can force reinstalling Airflow and pip all dependencies
# It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable.
ARG AIRFLOW_CI_BUILD_EPOCH="10"
# Setup PIP
ARG UV_HTTP_TIMEOUT="300"
# By default PIP has progress bar but you can disable it.
ARG PIP_PROGRESS_BAR="on"
# Optimizing installation of Cassandra driver (in case there are no prebuilt wheels which is the
# case as of 20.04.2021 with Python 3.9
# Speeds up building the image - cassandra driver without CYTHON saves around 10 minutes
ARG CASS_DRIVER_NO_CYTHON="1"
# Build cassandra driver on multiple CPUs
ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
# This value should be set by the CI image build system to the current timestamp
ARG AIRFLOW_VERSION=""
# Additional PIP flags passed to all pip install commands except reinstalling pip itself
ARG ADDITIONAL_PIP_INSTALL_FLAGS=""
ARG AIRFLOW_USE_UV="true"
ENV AIRFLOW_REPO=${AIRFLOW_REPO}\
AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \
AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \
AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \
AIRFLOW_VERSION=${AIRFLOW_VERSION} \
AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \
AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} \
UV_SYSTEM_PYTHON="true" \
UV_PROJECT_ENVIRONMENT="/usr/local" \
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true" \
AIRFLOW_INSTALLATION_METHOD="." \
AIRFLOW_VERSION_SPECIFICATION="" \
PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \
CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \
CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON}
RUN echo "Airflow version: ${AIRFLOW_VERSION}"
# Copy all scripts required for installation - changing any of those should lead to
# rebuilding from here
COPY --from=scripts common.sh install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/
# We are first creating a venv where all python packages and .so binaries needed by those are
# installed.
# Here we fix the versions so all subsequent commands will use the versions
# from the sources
# You can swap comments between those two args to test pip from the main version
# When you attempt to test if the version of `pip` from specified branch works for our builds
# Also use `force pip` label on your PR to swap all places we use `uv` to `pip`
ARG AIRFLOW_PIP_VERSION=25.3
# ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main"
ARG AIRFLOW_UV_VERSION=0.9.14
ARG AIRFLOW_PREK_VERSION="0.2.19"
# UV_LINK_MODE=copy is needed since we are using cache mounted from the host
ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \
UV_LINK_MODE=copy \
AIRFLOW_PREK_VERSION=${AIRFLOW_PREK_VERSION}
# The PATH is needed for python to find installed and cargo to build the wheels
ENV PATH="/usr/python/bin:/root/.local/bin:/root/.cargo/bin:${PATH}"
# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from
# an incorrect architecture.
ARG TARGETARCH
# Value to be able to easily change cache id and therefore use a bare new cache
ARG DEPENDENCY_CACHE_EPOCH="1"
# Install useful command line tools in their own virtualenv so that they do not clash with
# dependencies installed in Airflow also reinstall PIP and UV to make sure they are installed
# in the version specified above
RUN bash /scripts/docker/install_packaging_tools.sh
COPY --from=scripts install_airflow_when_building_images.sh /scripts/docker/
# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not
# copying over stuff that is accidentally generated or that we do not need (such as egg-info)
# if you want to add something that is missing and you expect to see it in the image you can
# add it with ! in .dockerignore next to the airflow, test etc. directories there
COPY . ${AIRFLOW_SOURCES}/
ARG UPGRADE_RANDOM_INDICATOR_STRING=""
ARG VERSION_SUFFIX=""
ENV UPGRADE_RANDOM_INDICATOR_STRING=${UPGRADE_RANDOM_INDICATOR_STRING} \
VERSION_SUFFIX=${VERSION_SUFFIX}
# The goal of this line is to install the dependencies from the most current pyproject.toml from sources
# This will be usually incremental small set of packages in CI optimized build, so it will be very fast
# In non-CI optimized build this will install all dependencies before installing sources.
# Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed.
# But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change
# and push the constraints if everything is successful
RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ bash /scripts/docker/install_airflow_when_building_images.sh
COPY --from=scripts install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/
ARG ADDITIONAL_PYTHON_DEPS=""
ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS}
RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ \
bash /scripts/docker/install_packaging_tools.sh; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
bash /scripts/docker/install_additional_dependencies.sh; \
fi
COPY --from=scripts entrypoint_ci.sh /entrypoint
COPY --from=scripts entrypoint_exec.sh /entrypoint-exec
RUN chmod a+x /entrypoint /entrypoint-exec
# Install autocomplete for airflow and kubectl
# hadolint ignore=SC2028
RUN if command -v airflow; then \
register-python-argcomplete airflow >> ~/.bashrc ; \
fi; \
echo "source /etc/bash_completion" >> ~/.bashrc ; \
echo 'export PS1="\[\033[1;36m\][Breeze:\$(python --version 2>&1 | cut -d\" \" -f2)]\[\033[0m\] \[\033[1;32m\]\u@\h\[\033[0m\]:\[\033[1;34m\]\w\[\033[0m\]\$ "' >> ~/.bashrc
WORKDIR ${AIRFLOW_SOURCES}
ARG BUILD_ID
ARG COMMIT_SHA
ARG AIRFLOW_IMAGE_DATE_CREATED
ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \
GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \
BUILD_ID=${BUILD_ID} \
COMMIT_SHA=${COMMIT_SHA} \
# When we enter the image, the /root/.cache is not mounted from temporary mount cache.
# We do not want to share the cache from host to avoid all kinds of problems where cache
# is different with different platforms / python versions. We want to have a clean cache
# in the image - and in this case /root/.cache is on the same filesystem as the installed packages.
# so we can go back to the default link mode being hardlink.
UV_LINK_MODE=hardlink \
MYPY_FORCE_COLOR="true"
# Link dumb-init for backwards compatibility (so that older images also work)
RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
EXPOSE 8080
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow-ci" \
org.apache.airflow.version="${AIRFLOW_VERSION}" \
org.apache.airflow.python.version="${AIRFLOW_PYTHON_VERSION}" \
org.apache.airflow.uid="0" \
org.apache.airflow.gid="0" \
org.apache.airflow.build-id="${BUILD_ID}" \
org.apache.airflow.commit-sha="${COMMIT_SHA}" \
org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \
org.opencontainers.image.created="${AIRFLOW_IMAGE_DATE_CREATED}" \
org.opencontainers.image.authors="dev@airflow.apache.org" \
org.opencontainers.image.url="https://airflow.apache.org" \
org.opencontainers.image.documentation="https://airflow.apache.org/docs/docker-stack/index.html" \
org.opencontainers.image.source="https://github.com/apache/airflow" \
org.opencontainers.image.version="${AIRFLOW_VERSION}" \
org.opencontainers.image.revision="${COMMIT_SHA}" \
org.opencontainers.image.vendor="Apache Software Foundation" \
org.opencontainers.image.licenses="Apache-2.0" \
org.opencontainers.image.ref.name="airflow-ci-image" \
org.opencontainers.image.title="Continuous Integration Airflow Image" \
org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies"
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD []