blob: 38f02c71467f99e0198fca3aa9c46c2e8a96fe94 [file] [log] [blame]
# syntax=docker/dockerfile:1.4
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT.
#
ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye"
##############################################################################################
# This is the script image where we keep all inlined bash scripts needed in other segments
# We use PYTHON_BASE_IMAGE to make sure that the scripts are different for different platforms.
##############################################################################################
FROM ${PYTHON_BASE_IMAGE} as scripts
##############################################################################################
# Please DO NOT modify the inlined scripts manually. The content of those files will be
# replaced by pre-commit automatically from the "scripts/docker/" folder.
# This is done in order to avoid problems with caching and file permissions and in order to
# make the PROD Dockerfile standalone
##############################################################################################
# The content below is automatically copied from scripts/docker/install_os_dependencies.sh
COPY <<"EOF" /install_os_dependencies.sh
set -euo pipefail
DOCKER_CLI_VERSION=20.10.9
if [[ "$#" != 1 ]]; then
echo "ERROR! There should be 'runtime' or 'dev' parameter passed as argument.".
exit 1
fi
if [[ "${1}" == "runtime" ]]; then
INSTALLATION_TYPE="RUNTIME"
elif [[ "${1}" == "dev" ]]; then
INSTALLATION_TYPE="dev"
else
echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime' or 'dev'.".
exit 1
fi
function get_dev_apt_deps() {
if [[ "${DEV_APT_DEPS=}" == "" ]]; then
DEV_APT_DEPS="apt-transport-https apt-utils build-essential ca-certificates dirmngr \
freetds-bin freetds-dev git gosu graphviz graphviz-dev krb5-user ldap-utils libffi-dev libgeos-dev \
libkrb5-dev libldap2-dev libleveldb1d libleveldb-dev libsasl2-2 libsasl2-dev libsasl2-modules \
libssl-dev locales lsb-release openssh-client sasl2-bin \
software-properties-common sqlite3 sudo unixodbc unixodbc-dev"
export DEV_APT_DEPS
fi
}
function get_runtime_apt_deps() {
if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then
RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \
curl dumb-init freetds-bin gosu krb5-user \
ldap-utils libffi7 libldap-2.4-2 libsasl2-2 libsasl2-modules libssl1.1 locales \
lsb-release netcat openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc"
export RUNTIME_APT_DEPS
fi
}
function install_docker_cli() {
local platform
if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then
platform="aarch64"
else
platform="x86_64"
fi
curl --silent \
"https://download.docker.com/linux/static/stable/${platform}/docker-${DOCKER_CLI_VERSION}.tgz" \
| tar -C /usr/bin --strip-components=1 -xvzf - docker/docker
}
function install_debian_dev_dependencies() {
apt-get update
apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1
apt-get install -y --no-install-recommends curl gnupg2 lsb-release
# shellcheck disable=SC2086
export ${ADDITIONAL_DEV_APT_ENV?}
if [[ ${DEV_APT_COMMAND} != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}"
fi
if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}"
fi
apt-get update
# shellcheck disable=SC2086
apt-get install -y --no-install-recommends ${DEV_APT_DEPS} ${ADDITIONAL_DEV_APT_DEPS}
}
function install_debian_runtime_dependencies() {
apt-get update
apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1
apt-get install -y --no-install-recommends curl gnupg2 lsb-release
# shellcheck disable=SC2086
export ${ADDITIONAL_RUNTIME_APT_ENV?}
if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}"
fi
if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then
bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}"
fi
apt-get update
# shellcheck disable=SC2086
apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS}
apt-get autoremove -yqq --purge
apt-get clean
rm -rf /var/lib/apt/lists/* /var/log/*
}
if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then
get_runtime_apt_deps
install_debian_runtime_dependencies
install_docker_cli
else
get_dev_apt_deps
install_debian_dev_dependencies
install_docker_cli
fi
EOF
# The content below is automatically copied from scripts/docker/install_mysql.sh
COPY <<"EOF" /install_mysql.sh
set -euo pipefail
declare -a packages
MYSQL_VERSION="8.0"
readonly MYSQL_VERSION
MARIADB_VERSION="10.5"
readonly MARIADB_VERSION
COLOR_BLUE=$'\e[34m'
readonly COLOR_BLUE
COLOR_YELLOW=$'\e[1;33m'
readonly COLOR_YELLOW
COLOR_RESET=$'\e[0m'
readonly COLOR_RESET
: "${INSTALL_MYSQL_CLIENT:?Should be true or false}"
install_mysql_client() {
if [[ "${1}" == "dev" ]]; then
packages=("libmysqlclient-dev" "mysql-client")
elif [[ "${1}" == "prod" ]]; then
packages=("libmysqlclient21" "mysql-client")
else
echo
echo "Specify either prod or dev"
echo
exit 1
fi
echo
echo "${COLOR_BLUE}Installing mysql client version ${MYSQL_VERSION}: ${1}${COLOR_RESET}"
echo
local key="467B942D3A79BD29"
readonly key
GNUPGHOME="$(mktemp -d)"
export GNUPGHOME
set +e
for keyserver in $(shuf -e ha.pool.sks-keyservers.net hkp://p80.pool.sks-keyservers.net:80 \
keyserver.ubuntu.com hkp://keyserver.ubuntu.com:80)
do
gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break
done
set -e
gpg --export "${key}" > /etc/apt/trusted.gpg.d/mysql.gpg
gpgconf --kill all
rm -rf "${GNUPGHOME}"
unset GNUPGHOME
echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list
apt-get update
apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
install_mariadb_client() {
if [[ "${1}" == "dev" ]]; then
packages=("libmariadb-dev" "mariadb-client-core-${MARIADB_VERSION}")
elif [[ "${1}" == "prod" ]]; then
packages=("mariadb-client-core-${MARIADB_VERSION}")
else
echo
echo "Specify either prod or dev"
echo
exit 1
fi
echo
echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_VERSION}: ${1}${COLOR_RESET}"
echo "${COLOR_YELLOW}MariaDB client binary compatible with MySQL client.${COLOR_RESET}"
echo
apt-get update
apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then
if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then
install_mariadb_client "${@}"
else
install_mysql_client "${@}"
fi
fi
EOF
# The content below is automatically copied from scripts/docker/install_mssql.sh
COPY <<"EOF" /install_mssql.sh
set -euo pipefail
: "${INSTALL_MSSQL_CLIENT:?Should be true or false}"
COLOR_BLUE=$'\e[34m'
readonly COLOR_BLUE
COLOR_RESET=$'\e[0m'
readonly COLOR_RESET
function install_mssql_client() {
# Install MsSQL client from Microsoft repositories
if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then
echo
echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}"
echo
return
fi
echo
echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}"
echo
local distro
local version
distro=$(lsb_release -is | tr '[:upper:]' '[:lower:]')
version=$(lsb_release -rs)
local driver=msodbcsql18
curl --silent https://packages.microsoft.com/keys/microsoft.asc | apt-key add - >/dev/null 2>&1
curl --silent "https://packages.microsoft.com/config/${distro}/${version}/prod.list" > \
/etc/apt/sources.list.d/mssql-release.list
apt-get update -yqq
apt-get upgrade -yqq
ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends "${driver}"
rm -rf /var/lib/apt/lists/*
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
install_mssql_client "${@}"
EOF
# The content below is automatically copied from scripts/docker/install_postgres.sh
COPY <<"EOF" /install_postgres.sh
set -euo pipefail
declare -a packages
COLOR_BLUE=$'\e[34m'
readonly COLOR_BLUE
COLOR_RESET=$'\e[0m'
readonly COLOR_RESET
: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}"
install_postgres_client() {
echo
echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}"
echo
if [[ "${1}" == "dev" ]]; then
packages=("libpq-dev" "postgresql-client")
elif [[ "${1}" == "prod" ]]; then
packages=("postgresql-client")
else
echo
echo "Specify either prod or dev"
echo
exit 1
fi
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list
apt-get update
apt-get install --no-install-recommends -y "${packages[@]}"
apt-get autoremove -yqq --purge
apt-get clean && rm -rf /var/lib/apt/lists/*
}
if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then
install_postgres_client "${@}"
fi
EOF
# The content below is automatically copied from scripts/docker/install_pip_version.sh
COPY <<"EOF" /install_pip_version.sh
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
: "${AIRFLOW_PIP_VERSION:?Should be set}"
common::get_colors
common::get_airflow_version_specification
common::override_pip_version_if_needed
common::show_pip_version_and_location
common::install_pip_version
EOF
# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh
COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
: "${AIRFLOW_REPO:?Should be set}"
: "${AIRFLOW_BRANCH:?Should be set}"
: "${INSTALL_MYSQL_CLIENT:?Should be true or false}"
: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}"
: "${AIRFLOW_PIP_VERSION:?Should be set}"
function install_airflow_dependencies_from_branch_tip() {
echo
echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}"
echo
if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}
fi
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
fi
# Install latest set of dependencies using constraints. In case constraints were upgraded and there
# are conflicts, this might fail, but it should be fixed in the following installation steps
set -x
pip install --root-user-action ignore \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true
common::install_pip_version
pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true
set +x
echo
echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}"
echo
pip uninstall --yes apache-airflow || true
}
common::get_colors
common::get_airflow_version_specification
common::override_pip_version_if_needed
common::get_constraints_location
common::show_pip_version_and_location
install_airflow_dependencies_from_branch_tip
EOF
# The content below is automatically copied from scripts/docker/common.sh
COPY <<"EOF" /common.sh
set -euo pipefail
function common::get_colors() {
COLOR_BLUE=$'\e[34m'
COLOR_GREEN=$'\e[32m'
COLOR_RED=$'\e[31m'
COLOR_RESET=$'\e[0m'
COLOR_YELLOW=$'\e[33m'
export COLOR_BLUE
export COLOR_GREEN
export COLOR_RED
export COLOR_RESET
export COLOR_YELLOW
}
function common::get_airflow_version_specification() {
if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=}
&& -n ${AIRFLOW_VERSION}
&& ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then
AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}"
fi
}
function common::override_pip_version_if_needed() {
if [[ -n ${AIRFLOW_VERSION} ]]; then
if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then
export AIRFLOW_PIP_VERSION="23.1.2"
fi
fi
}
function common::get_constraints_location() {
# auto-detect Airflow-constraint reference and location
if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then
if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then
AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION}
else
AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH}
fi
fi
if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then
local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}"
local python_version
python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)"
AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt"
fi
}
function common::show_pip_version_and_location() {
echo "PATH=${PATH}"
echo "pip on path: $(which pip)"
echo "Using pip: $(pip --version)"
}
function common::install_pip_version() {
echo
echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}"
echo
if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then
pip install --disable-pip-version-check --no-cache-dir "pip @ ${AIRFLOW_PIP_VERSION}"
else
pip install --disable-pip-version-check --no-cache-dir "pip==${AIRFLOW_PIP_VERSION}"
fi
mkdir -p "${HOME}/.local/bin"
}
EOF
# The content below is automatically copied from scripts/docker/install_pipx_tools.sh
COPY <<"EOF" /install_pipx_tools.sh
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
function install_pipx_tools() {
echo
echo "${COLOR_BLUE}Installing pipx tools${COLOR_RESET}"
echo
# Make sure PIPX is installed in latest version
pip install --root-user-action ignore --upgrade pipx
if [[ $(uname -m) != "aarch64" ]]; then
# Do not install mssql-cli for ARM
# Install all the tools we need available in command line but without impacting the current environment
pipx install mssql-cli
# Unfortunately mssql-cli installed by `pipx` does not work out of the box because it uses
# its own execution bash script which is not compliant with the auto-activation of
# pipx venvs - we need to manually patch Python executable in the script to fix it: ¯\_(ツ)_/¯
sed "s/python /\/root\/\.local\/pipx\/venvs\/mssql-cli\/bin\/python /" -i /root/.local/bin/mssql-cli
fi
}
common::get_colors
install_pipx_tools
EOF
# The content below is automatically copied from scripts/docker/install_airflow.sh
COPY <<"EOF" /install_airflow.sh
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
: "${AIRFLOW_PIP_VERSION:?Should be set}"
function install_airflow() {
# Coherence check for editable installation mode.
if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \
${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then
echo
echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}"
echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}"
exit 1
fi
# Remove mysql from extras if client is not going to be installed
if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}
echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}"
fi
# Remove postgres from extras if client is not going to be installed
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}"
fi
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then
echo
echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}"
echo
# eager upgrade
pip install --root-user-action ignore --upgrade --upgrade-strategy eager \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
"${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \
${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS}
if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then
# Remove airflow and reinstall it using editable flag
# We can only do it when we install airflow from sources
set -x
pip uninstall apache-airflow --yes
pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
"${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}"
set +x
fi
common::install_pip_version
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
else \
echo
echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}"
echo
set -x
pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
"${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"
common::install_pip_version
# then upgrade if needed without using constraints to account for new limits in setup.py
pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
${AIRFLOW_INSTALL_EDITABLE_FLAG} \
"${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}"
common::install_pip_version
set +x
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
fi
}
common::get_colors
common::get_airflow_version_specification
common::override_pip_version_if_needed
common::get_constraints_location
common::show_pip_version_and_location
install_airflow
EOF
# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh
COPY <<"EOF" /install_additional_dependencies.sh
set -euo pipefail
: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}"
: "${ADDITIONAL_PYTHON_DEPS:?Should be set}"
: "${AIRFLOW_PIP_VERSION:?Should be set}"
. "$( dirname "${BASH_SOURCE[0]}" )/common.sh"
function install_additional_dependencies() {
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then
echo
echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}"
echo
set -x
pip install --root-user-action ignore --upgrade --upgrade-strategy eager \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS}
common::install_pip_version
set +x
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
else
echo
echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}"
echo
set -x
pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
${ADDITIONAL_PYTHON_DEPS}
common::install_pip_version
set +x
echo
echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}"
echo
pip check
fi
}
common::get_colors
common::get_airflow_version_specification
common::override_pip_version_if_needed
common::get_constraints_location
common::show_pip_version_and_location
install_additional_dependencies
EOF
# The content below is automatically copied from scripts/docker/entrypoint_ci.sh
COPY <<"EOF" /entrypoint_ci.sh
#!/usr/bin/env bash
if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then
set -x
fi
. "${AIRFLOW_SOURCES:-/opt/airflow}"/scripts/in_container/_in_container_script_init.sh
LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6"
export LD_PRELOAD
chmod 1777 /tmp
AIRFLOW_SOURCES=$(cd "${IN_CONTAINER_DIR}/../.." || exit 1; pwd)
PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.7}
export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}}
: "${AIRFLOW_SOURCES:?"ERROR: AIRFLOW_SOURCES not set !!!!"}"
function wait_for_asset_compilation() {
if [[ -f "${AIRFLOW_SOURCES}/.build/www/.asset_compile.lock" ]]; then
echo
echo "${COLOR_YELLOW}Waiting for asset compilation to complete in the background.${COLOR_RESET}"
echo
local counter=0
while [[ -f "${AIRFLOW_SOURCES}/.build/www/.asset_compile.lock" ]]; do
if (( counter % 5 == 2 )); then
echo "${COLOR_BLUE}Still waiting .....${COLOR_RESET}"
fi
sleep 1
((counter=counter+1))
if [[ ${counter} == "30" ]]; then
echo
echo "${COLOR_YELLOW}The asset compilation is taking too long.${COLOR_YELLOW}"
echo """
If it does not complete soon, you might want to stop it and remove file lock:
* press Ctrl-C
* run 'rm ${AIRFLOW_SOURCES}/.build/www/.asset_compile.lock'
"""
fi
if [[ ${counter} == "60" ]]; then
echo
echo "${COLOR_RED}The asset compilation is taking too long. Exiting.${COLOR_RED}"
echo
exit 1
fi
done
fi
if [ -f "${AIRFLOW_SOURCES}/.build/www/asset_compile.out" ]; then
echo
echo "${COLOR_RED}The asset compilation failed. Exiting.${COLOR_RESET}"
echo
cat "${AIRFLOW_SOURCES}/.build/www/asset_compile.out"
rm "${AIRFLOW_SOURCES}/.build/www/asset_compile.out"
echo
exit 1
fi
}
if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then
if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then
if [[ ${BACKEND:=} == "mssql" ]]; then
echo "${COLOR_RED}ARM platform is not supported for ${BACKEND} backend. Exiting.${COLOR_RESET}"
exit 1
fi
fi
echo
echo "${COLOR_BLUE}Running Initialization. Your basic configuration is:${COLOR_RESET}"
echo
echo " * ${COLOR_BLUE}Airflow home:${COLOR_RESET} ${AIRFLOW_HOME}"
echo " * ${COLOR_BLUE}Airflow sources:${COLOR_RESET} ${AIRFLOW_SOURCES}"
echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__CORE__SQL_ALCHEMY_CONN:=}"
echo
RUN_TESTS=${RUN_TESTS:="false"}
CI=${CI:="false"}
USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}"
TEST_TIMEOUT=${TEST_TIMEOUT:="60"}
if [[ ${USE_AIRFLOW_VERSION} == "" ]]; then
export PYTHONPATH=${AIRFLOW_SOURCES}
echo
echo "${COLOR_BLUE}Using airflow version from current sources${COLOR_RESET}"
echo
# Cleanup the logs, tmp when entering the environment
sudo rm -rf "${AIRFLOW_SOURCES}"/logs/*
sudo rm -rf "${AIRFLOW_SOURCES}"/tmp/*
mkdir -p "${AIRFLOW_SOURCES}"/logs/
mkdir -p "${AIRFLOW_SOURCES}"/tmp/
elif [[ ${USE_AIRFLOW_VERSION} == "none" ]]; then
echo
echo "${COLOR_BLUE}Skip installing airflow - only install wheel/tar.gz packages that are present locally.${COLOR_RESET}"
echo
echo
echo "${COLOR_BLUE}Uninstalling airflow and providers"
echo
uninstall_airflow_and_providers
elif [[ ${USE_AIRFLOW_VERSION} == "wheel" ]]; then
echo
echo "${COLOR_BLUE}Uninstalling airflow and providers"
echo
uninstall_airflow_and_providers
if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then
echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}"
echo
install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "none"
else
echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}"
echo
install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}"
fi
uninstall_providers
elif [[ ${USE_AIRFLOW_VERSION} == "sdist" ]]; then
echo
echo "${COLOR_BLUE}Uninstalling airflow and providers"
echo
uninstall_airflow_and_providers
echo
if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then
echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}"
echo
install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "none"
else
echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}"
echo
install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}"
fi
uninstall_providers
else
echo
echo "${COLOR_BLUE}Uninstalling airflow and providers"
echo
uninstall_airflow_and_providers
echo
if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then
echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}"
echo
install_released_airflow_version "${USE_AIRFLOW_VERSION}" "none"
else
echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}"
echo
install_released_airflow_version "${USE_AIRFLOW_VERSION}" "${AIRFLOW_CONSTRAINTS_REFERENCE}"
fi
if [[ "${USE_AIRFLOW_VERSION}" =~ ^2\.2\..*|^2\.1\..*|^2\.0\..* && "${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=}" != "" ]]; then
# make sure old variable is used for older airflow versions
export AIRFLOW__CORE__SQL_ALCHEMY_CONN="${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN}"
fi
fi
if [[ ${USE_PACKAGES_FROM_DIST=} == "true" ]]; then
echo
echo "${COLOR_BLUE}Install all packages from dist folder${COLOR_RESET}"
if [[ ${USE_AIRFLOW_VERSION} == "wheel" ]]; then
echo "${COLOR_BLUE}(except apache-airflow)${COLOR_RESET}"
fi
if [[ ${PACKAGE_FORMAT} == "both" ]]; then
echo
echo "${COLOR_RED}ERROR:You can only specify 'wheel' or 'sdist' as PACKAGE_FORMAT not 'both'.${COLOR_RESET}"
echo
exit 1
fi
echo
if [[ ${INSTALL_SELECTED_PROVIDERS=} != "" ]]; then
IFS=\, read -ra selected_providers <<<"${INSTALL_SELECTED_PROVIDERS}"
echo
echo "${COLOR_BLUE}Selected providers to install: '${selected_providers[*]}'${COLOR_RESET}"
echo
else
echo
echo "${COLOR_BLUE}Installing all found providers${COLOR_RESET}"
echo
selected_providers=()
fi
installable_files=()
for file in /dist/*.{whl,tar.gz}
do
if [[ ${file} == "/dist/apache?airflow-[0-9]"* ]]; then
# Skip Apache Airflow package - it's just been installed above if
# --use-airflow-version was set and should be skipped otherwise
echo "${COLOR_BLUE}Skipping airflow core package ${file} from provider installation.${COLOR_RESET}"
continue
fi
if [[ ${PACKAGE_FORMAT} == "wheel" && ${file} == *".whl" ]]; then
provider_name=$(echo "${file}" | sed 's/\/dist\/apache_airflow_providers_//' | sed 's/-[0-9].*//' | sed 's/-/./g')
if [[ ${INSTALL_SELECTED_PROVIDERS=} != "" ]]; then
# shellcheck disable=SC2076
if [[ " ${selected_providers[*]} " =~ " ${provider_name} " ]]; then
echo "${COLOR_BLUE}Adding ${provider_name} to install via ${file}${COLOR_RESET}"
installable_files+=( "${file}" )
else
echo "${COLOR_BLUE}Skipping ${provider_name} as it is not in the list of '${selected_providers[*]}'${COLOR_RESET}"
fi
else
echo "${COLOR_BLUE}Adding ${provider_name} to install via ${file}${COLOR_RESET}"
installable_files+=( "${file}" )
fi
fi
if [[ ${PACKAGE_FORMAT} == "sdist" && ${file} == *".tar.gz" ]]; then
provider_name=$(echo "${file}" | sed 's/\/dist\/apache-airflow-providers-//' | sed 's/-[0-9].*//' | sed 's/-/./g')
if [[ ${INSTALL_SELECTED_PROVIDERS=} != "" ]]; then
# shellcheck disable=SC2076
if [[ " ${selected_providers[*]} " =~ " ${provider_name} " ]]; then
echo "${COLOR_BLUE}Adding ${provider_name} to install via ${file}${COLOR_RESET}"
installable_files+=( "${file}" )
else
echo "${COLOR_BLUE}Skipping ${provider_name} as it is not in the list of '${selected_providers[*]}'${COLOR_RESET}"
fi
else
echo "${COLOR_BLUE}Adding ${provider_name} to install via ${file}${COLOR_RESET}"
installable_files+=( "${file}" )
fi
fi
done
if [[ ${USE_AIRFLOW_VERSION} != "wheel" && ${USE_AIRFLOW_VERSION} != "sdist" && ${USE_AIRFLOW_VERSION} != "none" ]]; then
echo
echo "${COLOR_BLUE}Also adding airflow in specified version ${USE_AIRFLOW_VERSION} to make sure it is not upgraded by >= limits${COLOR_RESET}"
echo
installable_files+=( "apache-airflow==${USE_AIRFLOW_VERSION}" )
fi
echo
echo "${COLOR_BLUE}Installing: ${installable_files[*]}${COLOR_RESET}"
echo
if (( ${#installable_files[@]} )); then
pip install --root-user-action ignore "${installable_files[@]}"
fi
fi
# Added to have run-tests on path
export PATH=${PATH}:${AIRFLOW_SOURCES}
# This is now set in conftest.py - only for pytest tests
unset AIRFLOW__CORE__UNIT_TEST_MODE
mkdir -pv "${AIRFLOW_HOME}/logs/"
cp -f "${IN_CONTAINER_DIR}/airflow_ci.cfg" "${AIRFLOW_HOME}/unittests.cfg"
# Change the default worker_concurrency for tests
export AIRFLOW__CELERY__WORKER_CONCURRENCY=8
set +e
"${IN_CONTAINER_DIR}/check_environment.sh"
ENVIRONMENT_EXIT_CODE=$?
set -e
if [[ ${ENVIRONMENT_EXIT_CODE} != 0 ]]; then
echo
echo "Error: check_environment returned ${ENVIRONMENT_EXIT_CODE}. Exiting."
echo
exit ${ENVIRONMENT_EXIT_CODE}
fi
mkdir -p /usr/lib/google-cloud-sdk/bin
touch /usr/lib/google-cloud-sdk/bin/gcloud
ln -s -f /usr/bin/gcloud /usr/lib/google-cloud-sdk/bin/gcloud
in_container_fix_ownership
if [[ ${SKIP_SSH_SETUP="false"} == "false" ]]; then
# Set up ssh keys
echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -m PEM -P '' -f ~/.ssh/id_rsa \
>"${AIRFLOW_HOME}/logs/ssh-keygen.log" 2>&1
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2
chmod 600 ~/.ssh/*
# SSH Service
sudo service ssh restart >/dev/null 2>&1
# Sometimes the server is not quick enough to load the keys!
while [[ $(ssh-keyscan -H localhost 2>/dev/null | wc -l) != "3" ]] ; do
echo "Not all keys yet loaded by the server"
sleep 0.05
done
ssh-keyscan -H localhost >> ~/.ssh/known_hosts 2>/dev/null
fi
# shellcheck source=scripts/in_container/configure_environment.sh
. "${IN_CONTAINER_DIR}/configure_environment.sh"
# shellcheck source=scripts/in_container/run_init_script.sh
. "${IN_CONTAINER_DIR}/run_init_script.sh"
cd "${AIRFLOW_SOURCES}"
if [[ ${START_AIRFLOW:="false"} == "true" || ${START_AIRFLOW} == "True" ]]; then
export AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS=${LOAD_DEFAULT_CONNECTIONS}
export AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES}
wait_for_asset_compilation
# shellcheck source=scripts/in_container/bin/run_tmux
exec run_tmux
fi
fi
rm -f "${AIRFLOW_SOURCES}/pytest.ini"
set +u
if [[ "${RUN_TESTS}" != "true" ]]; then
exec /bin/bash "${@}"
fi
set -u
if [[ ${HELM_TEST_PACKAGE=} != "" ]]; then
export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE/\[*\]/}-${HELM_TEST_PACKAGE}-${BACKEND}.xml"
export WARNINGS_FILE="/files/warnings-${TEST_TYPE/\[*\]/}-${HELM_TEST_PACKAGE}-${BACKEND}.txt"
else
export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE/\[*\]/}-${BACKEND}.xml"
export WARNINGS_FILE="/files/warnings-${TEST_TYPE/\[*\]/}-${BACKEND}.txt"
fi
EXTRA_PYTEST_ARGS=(
"--verbosity=0"
"--strict-markers"
"--durations=100"
"--maxfail=50"
"--color=yes"
"--junitxml=${RESULT_LOG_FILE}"
# timeouts in seconds for individual tests
"--timeouts-order"
"moi"
"--setup-timeout=${TEST_TIMEOUT}"
"--execution-timeout=${TEST_TIMEOUT}"
"--teardown-timeout=${TEST_TIMEOUT}"
"--output=${WARNINGS_FILE}"
"--disable-warnings"
# Only display summary for non-expected cases
#
# f - failed
# E - error
# X - xpassed (passed even if expected to fail)
# s - skipped
#
# The following cases are not displayed:
# x - xfailed (expected to fail and failed)
# p - passed
# P - passed with output
#
"-rfEXs"
)
if [[ ${SUSPENDED_PROVIDERS_FOLDERS=} != "" ]]; then
for provider in ${SUSPENDED_PROVIDERS_FOLDERS=}; do
echo "Skipping tests for suspended provider: ${provider}"
EXTRA_PYTEST_ARGS+=(
"--ignore=tests/providers/${provider}"
"--ignore=tests/system/providers/${provider}"
"--ignore=tests/integration/providers/${provider}"
)
done
fi
if [[ "${TEST_TYPE}" == "Helm" ]]; then
_cpus="$(grep -c 'cpu[0-9]' /proc/stat)"
echo "Running tests with ${_cpus} CPUs in parallel"
# Enable parallelism and disable coverage
EXTRA_PYTEST_ARGS+=(
"-n" "${_cpus}"
"--no-cov"
)
else
EXTRA_PYTEST_ARGS+=(
"--with-db-init"
)
fi
if [[ ${ENABLE_TEST_COVERAGE:="false"} == "true" ]]; then
EXTRA_PYTEST_ARGS+=(
"--cov=airflow"
"--cov-config=.coveragerc"
"--cov-report=xml:/files/coverage-${TEST_TYPE/\[*\]/}-${BACKEND}.xml"
)
fi
if [[ ${COLLECT_ONLY:="false"} == "true" ]]; then
EXTRA_PYTEST_ARGS+=(
"--collect-only"
"-qqqq"
"--disable-warnings"
)
fi
if [[ ${REMOVE_ARM_PACKAGES:="false"} == "true" ]]; then
# Test what happens if we do not have ARM packages installed.
# This is useful to see if pytest collection works without ARM packages which is important
# for the MacOS M1 users running tests in their ARM machines with `breeze testing tests` command
python "${IN_CONTAINER_DIR}/remove_arm_packages.py"
fi
declare -a SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TESTS \
ALL_TESTS ALL_PRESELECTED_TESTS ALL_OTHER_TESTS
function find_all_other_tests() {
local all_tests_dirs
# The output of the find command should be sorted to make sure that the order is always the same
# when we run the tests, to avoid cross-package side effects causing different test results
# in different environments. See https://github.com/apache/airflow/pull/30588 for example.
all_tests_dirs=$(find "tests" -type d ! -name '__pycache__' | sort)
all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests$/d" )
all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests\/dags/d" )
local path
for path in "${ALL_PRESELECTED_TESTS[@]}"
do
escaped_path="${path//\//\\\/}"
all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/${escaped_path}/d" )
done
for path in ${all_tests_dirs}
do
ALL_OTHER_TESTS+=("${path}")
done
}
if [[ ${#@} -gt 0 && -n "$1" ]]; then
SELECTED_TESTS=("${@}")
else
CLI_TESTS=("tests/cli")
API_TESTS=("tests/api_experimental" "tests/api_connexion" "tests/api_internal")
PROVIDERS_TESTS=("tests/providers")
ALWAYS_TESTS=("tests/always")
CORE_TESTS=(
"tests/core"
"tests/executors"
"tests/jobs"
"tests/models"
"tests/serialization"
"tests/ti_deps"
"tests/utils"
)
WWW_TESTS=("tests/www")
HELM_CHART_TESTS=("tests/charts")
INTEGRATION_TESTS=("tests/integration")
SYSTEM_TESTS=("tests/system")
ALL_TESTS=("tests")
ALL_PRESELECTED_TESTS=(
"${CLI_TESTS[@]}"
"${API_TESTS[@]}"
"${HELM_CHART_TESTS[@]}"
"${INTEGRATION_TESTS[@]}"
"${PROVIDERS_TESTS[@]}"
"${CORE_TESTS[@]}"
"${ALWAYS_TESTS[@]}"
"${WWW_TESTS[@]}"
"${SYSTEM_TESTS[@]}"
)
NO_PROVIDERS_INTEGRATION_TESTS=(
"tests/integration/api_experimental"
"tests/integration/cli"
"tests/integration/executors"
"tests/integration/security"
)
if [[ ${TEST_TYPE:=""} == "CLI" ]]; then
SELECTED_TESTS=("${CLI_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "API" ]]; then
SELECTED_TESTS=("${API_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "Providers" ]]; then
SELECTED_TESTS=("${PROVIDERS_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "Core" ]]; then
SELECTED_TESTS=("${CORE_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "Always" ]]; then
SELECTED_TESTS=("${ALWAYS_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "WWW" ]]; then
SELECTED_TESTS=("${WWW_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "Helm" ]]; then
if [[ ${HELM_TEST_PACKAGE=} != "" ]]; then
SELECTED_TESTS=("tests/charts/${HELM_TEST_PACKAGE}")
else
SELECTED_TESTS=("${HELM_CHART_TESTS[@]}")
fi
elif [[ ${TEST_TYPE:=""} == "Integration" ]]; then
if [[ ${SKIP_PROVIDER_TESTS:=""} == "true" ]]; then
SELECTED_TESTS=("${NO_PROVIDERS_INTEGRATION_TESTS[@]}")
else
SELECTED_TESTS=("${INTEGRATION_TESTS[@]}")
fi
elif [[ ${TEST_TYPE:=""} == "Other" ]]; then
find_all_other_tests
SELECTED_TESTS=("${ALL_OTHER_TESTS[@]}")
elif [[ ${TEST_TYPE:=""} == "All" || ${TEST_TYPE} == "Quarantined" || \
${TEST_TYPE} == "Always" || \
${TEST_TYPE} == "Postgres" || ${TEST_TYPE} == "MySQL" || \
${TEST_TYPE} == "Long" ]]; then
SELECTED_TESTS=("${ALL_TESTS[@]}")
elif [[ ${TEST_TYPE} =~ Providers\[\-(.*)\] ]]; then
# When providers start with `-` it means that we should run all provider tests except those
SELECTED_TESTS=("${PROVIDERS_TESTS[@]}")
for provider in ${BASH_REMATCH[1]//,/ }
do
providers_dir="tests/providers/${provider//./\/}"
if [[ -d ${providers_dir} ]]; then
echo "${COLOR_BLUE}Ignoring ${providers_dir} as it has been deselected.${COLOR_RESET}"
EXTRA_PYTEST_ARGS+=("--ignore=tests/providers/${provider//./\/}")
else
echo "${COLOR_YELLOW}Skipping ${providers_dir} as the directory does not exist.${COLOR_RESET}"
fi
done
elif [[ ${TEST_TYPE} =~ Providers\[(.*)\] ]]; then
SELECTED_TESTS=()
for provider in ${BASH_REMATCH[1]//,/ }
do
providers_dir="tests/providers/${provider//./\/}"
if [[ -d ${providers_dir} ]]; then
SELECTED_TESTS+=("${providers_dir}")
else
echo "${COLOR_YELLOW}Skip ${providers_dir} as the directory does not exist.${COLOR_RESET}"
fi
done
elif [[ ${TEST_TYPE} =~ PlainAsserts ]]; then
# Those tests fail when --asert=rewrite is set, therefore we run them separately
# with --assert=plain to make sure they pass.
SELECTED_TESTS=(
# this on is mysteriously failing dill serialization. It could be removed once
# https://github.com/pytest-dev/pytest/issues/10845 is fixed
"tests/operators/test_python.py::TestPythonVirtualenvOperator::test_airflow_context"
)
EXTRA_PYTEST_ARGS+=("--assert=plain")
export PYTEST_PLAIN_ASSERTS="true"
else
echo
echo "${COLOR_RED}ERROR: Wrong test type ${TEST_TYPE} ${COLOR_RESET}"
echo
exit 1
fi
fi
if [[ ${UPGRADE_BOTO=} == "true" ]]; then
echo
echo "${COLOR_BLUE}Upgrading boto3, botocore to latest version to run Amazon tests with them${COLOR_RESET}"
echo
pip uninstall aiobotocore -y || true
pip install --upgrade boto3 botocore
fi
readonly SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TESTS \
ALL_TESTS ALL_PRESELECTED_TESTS
if [[ ${TEST_TYPE:=""} == "Long" ]]; then
EXTRA_PYTEST_ARGS+=(
"-m" "long_running"
"--include-long-running"
)
elif [[ ${TEST_TYPE:=""} == "Postgres" ]]; then
EXTRA_PYTEST_ARGS+=(
"--backend"
"postgres"
)
elif [[ ${TEST_TYPE:=""} == "MySQL" ]]; then
EXTRA_PYTEST_ARGS+=(
"--backend"
"mysql"
)
elif [[ ${TEST_TYPE:=""} == "Quarantined" ]]; then
EXTRA_PYTEST_ARGS+=(
"-m" "quarantined"
"--include-quarantined"
)
fi
echo
echo "Running tests ${SELECTED_TESTS[*]}"
echo
ARGS=("${EXTRA_PYTEST_ARGS[@]}" "${SELECTED_TESTS[@]}")
if [[ ${RUN_SYSTEM_TESTS:="false"} == "true" ]]; then
"${IN_CONTAINER_DIR}/run_system_tests.sh" "${ARGS[@]}"
else
"${IN_CONTAINER_DIR}/run_ci_tests.sh" "${ARGS[@]}"
fi
EOF
# The content below is automatically copied from scripts/docker/entrypoint_exec.sh
COPY <<"EOF" /entrypoint_exec.sh
#!/usr/bin/env bash
. /opt/airflow/scripts/in_container/_in_container_script_init.sh
. /opt/airflow/scripts/in_container/configure_environment.sh
. /opt/airflow/scripts/in_container/run_init_script.sh
exec /bin/bash "${@}"
EOF
# The content below is automatically copied from scripts/docker/patch_cassandra_type_code.py
COPY <<"EOF" /patch_cassandra_type_code.py
#!/usr/bin/env python
from __future__ import annotations
import cassandra.type_codes as cassandra_type_codes
if __name__ == "__main__":
print()
path_to_patch = cassandra_type_codes.__file__
with open(path_to_patch, "r+") as f:
content = f.read()
if "PYTEST_DONT_REWRITE" in content:
print(f"The {path_to_patch} is already patched with PYTEST_DONT_REWRITE")
print()
exit(0)
f.seek(0)
content = content.replace('"""', '"""\nPYTEST_DONT_REWRITE', 1)
f.write(content)
f.truncate()
print(f"Patched {path_to_patch} with PYTEST_DONT_REWRITE")
print()
exit(0)
EOF
FROM ${PYTHON_BASE_IMAGE} as main
# Nolog bash flag is currently ignored - but you can replace it with other flags (for example
# xtrace - to show commands executed)
SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"]
ARG PYTHON_BASE_IMAGE
ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow"
# By increasing this number we can do force build of all dependencies
ARG DEPENDENCIES_EPOCH_NUMBER="8"
# Make sure noninteractive debian install is used and language variables set
ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \
DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \
DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true"
RUN echo "Base image version: ${PYTHON_BASE_IMAGE}"
ARG DEV_APT_COMMAND=""
ARG ADDITIONAL_DEV_APT_COMMAND=""
ARG ADDITIONAL_DEV_ENV_VARS=""
ARG ADDITIONAL_DEV_APT_DEPS="bash-completion dumb-init git graphviz gosu krb5-user \
less libenchant-2-2 libgcc-10-dev libgeos-dev libpq-dev net-tools netcat \
openssh-server postgresql-client software-properties-common rsync tmux unzip vim xxd"
ARG ADDITIONAL_DEV_APT_ENV=""
ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \
ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \
ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND}
COPY --from=scripts install_os_dependencies.sh /scripts/docker/
RUN bash /scripts/docker/install_os_dependencies.sh dev
# Only copy mysql/mssql installation scripts for now - so that changing the other
# scripts which are needed much later will not invalidate the docker layer here.
COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/
ARG HOME=/root
ARG AIRFLOW_HOME=/root/airflow
ARG AIRFLOW_SOURCES=/opt/airflow
ENV HOME=${HOME} \
AIRFLOW_HOME=${AIRFLOW_HOME} \
AIRFLOW_SOURCES=${AIRFLOW_SOURCES}
# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an
# unexpected result - the cache for Dockerfiles might get invalidated in case the host system
# had different umask set and group x bit was not set. In Azure the bit might be not set at all.
# That also protects against AUFS Docker backen dproblem where changing the executable bit required sync
RUN bash /scripts/docker/install_mysql.sh prod \
&& bash /scripts/docker/install_mysql.sh dev \
&& bash /scripts/docker/install_mssql.sh \
&& bash /scripts/docker/install_postgres.sh dev \
&& adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \
--quiet "airflow" --home "/home/airflow" \
&& echo -e "airflow\nairflow" | passwd airflow 2>&1 \
&& echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
&& chmod 0440 /etc/sudoers.d/airflow
# Install Helm
ARG HELM_VERSION="v3.9.4"
RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \
&& PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \
&& HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \
&& curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \
&& chmod +x /usr/local/bin/helm
WORKDIR ${AIRFLOW_SOURCES}
RUN mkdir -pv ${AIRFLOW_HOME} && \
mkdir -pv ${AIRFLOW_HOME}/dags && \
mkdir -pv ${AIRFLOW_HOME}/logs
ARG AIRFLOW_REPO=apache/airflow
ARG AIRFLOW_BRANCH=main
# Airflow Extras installed
ARG AIRFLOW_EXTRAS="all"
ARG ADDITIONAL_AIRFLOW_EXTRAS=""
# Allows to override constraints source
ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow"
ARG AIRFLOW_CONSTRAINTS_MODE="constraints-source-providers"
ARG AIRFLOW_CONSTRAINTS_REFERENCE=""
ARG AIRFLOW_CONSTRAINTS_LOCATION=""
ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main"
# By changing the epoch we can force reinstalling Airflow and pip all dependencies
# It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable.
ARG AIRFLOW_CI_BUILD_EPOCH="4"0
ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
# By default in the image, we are installing all providers when installing from sources
ARG INSTALL_PROVIDERS_FROM_SOURCES="true"
ARG AIRFLOW_PIP_VERSION=23.1.2
# Setup PIP
# By default PIP install run without cache to make image smaller
ARG PIP_NO_CACHE_DIR="true"
# By default PIP has progress bar but you can disable it.
ARG PIP_PROGRESS_BAR="on"
# Optimizing installation of Cassandra driver (in case there are no prebuilt wheels which is the
# case as of 20.04.2021 with Python 3.9
# Speeds up building the image - cassandra driver without CYTHON saves around 10 minutes
ARG CASS_DRIVER_NO_CYTHON="1"
# Build cassandra driver on multiple CPUs
ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
ARG AIRFLOW_VERSION="2.5.0.dev0"
# Additional PIP flags passed to all pip install commands except reinstalling pip itself
ARG ADDITIONAL_PIP_INSTALL_FLAGS=""
ENV AIRFLOW_REPO=${AIRFLOW_REPO}\
AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \
AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \
AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \
AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \
AIRFLOW_VERSION=${AIRFLOW_VERSION} \
AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
# In the CI image we always:
# * install MySQL, MsSQL
# * install airflow from current sources, not from PyPI package
# * install airflow without `--user` flag
# * install airflow in editable mode
# * install always current version of airflow
INSTALL_MYSQL_CLIENT="true" \
INSTALL_MSSQL_CLIENT="true" \
INSTALL_POSTGRES_CLIENT="true" \
AIRFLOW_INSTALLATION_METHOD="." \
AIRFLOW_INSTALL_EDITABLE_FLAG="--editable" \
AIRFLOW_VERSION_SPECIFICATION="" \
PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \
PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \
CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \
CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON}
RUN echo "Airflow version: ${AIRFLOW_VERSION}"
# Those are additional constraints that are needed for some extras but we do not want to
# force them on the main Airflow package. Currently we need no extra limits as PIP 23.1+ has much better
# dependency resolution and we do not need to limit the versions of the dependencies
# !!! MAKE SURE YOU SYNCHRONIZE THE LIST BETWEEN: Dockerfile, Dockerfile.ci
ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=""
ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \
UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
# Copy all scripts required for installation - changing any of those should lead to
# rebuilding from here
COPY --from=scripts install_pip_version.sh install_airflow_dependencies_from_branch_tip.sh \
common.sh /scripts/docker/
# We are first creating a venv where all python packages and .so binaries needed by those are
# installed.
# In case of CI builds we want to pre-install main version of airflow dependencies so that
# We do not have to always reinstall it from the scratch.
# And is automatically reinstalled from the scratch every time patch release of python gets released
# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false")
# are uninstalled, only dependencies remain.
# the cache is only used when "upgrade to newer dependencies" is not set to automatically
# account for removed dependencies (we do not install them in the first place)
RUN bash /scripts/docker/install_pip_version.sh; \
if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
fi
# The PATH is needed for PIPX to find the tools installed
ENV PATH="/root/.local/bin:${PATH}"
COPY --from=scripts install_pipx_tools.sh /scripts/docker/
# Install useful command line tools in their own virtualenv so that they do not clash with
# dependencies installed in Airflow
RUN bash /scripts/docker/install_pipx_tools.sh
# Airflow sources change frequently but dependency configuration won't change that often
# We copy setup.py and other files needed to perform setup of dependencies
# So in case setup.py changes we can install latest dependencies required.
COPY setup.py ${AIRFLOW_SOURCES}/setup.py
COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg
COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/
COPY generated/provider_dependencies.json ${AIRFLOW_SOURCES}/generated/
COPY constraints/* ${AIRFLOW_SOURCES}/constraints/
COPY --from=scripts install_airflow.sh /scripts/docker/
# The goal of this line is to install the dependencies from the most current setup.py from sources
# This will be usually incremental small set of packages in CI optimized build, so it will be very fast
# In non-CI optimized build this will install all dependencies before installing sources.
# Usually we will install versions based on the dependencies in setup.py and upgraded only if needed.
# But in cron job we will install latest versions matching setup.py to see if there is no breaking change
# and push the constraints if everything is successful
RUN bash /scripts/docker/install_airflow.sh
COPY --from=scripts entrypoint_ci.sh /entrypoint
COPY --from=scripts entrypoint_exec.sh /entrypoint-exec
RUN chmod a+x /entrypoint /entrypoint-exec
COPY --from=scripts install_pip_version.sh install_additional_dependencies.sh /scripts/docker/
# Additional python deps to install
ARG ADDITIONAL_PYTHON_DEPS=""
RUN bash /scripts/docker/install_pip_version.sh; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
bash /scripts/docker/install_additional_dependencies.sh; \
fi
COPY --from=scripts patch_cassandra_type_code.py /patch_cassandra_type_code.py
# Patch cassandra type_code to avoide accidental type_code assert rewriting breaking pytest
# test discovery and execution.
# This one can be fixed once https://github.com/pytest-dev/pytest/issues/10844
# is fixed and released or once the workaround is merged in cassandra-driver
# https://github.com/datastax/python-driver/pull/1142
RUN python /patch_cassandra_type_code.py
# Install autocomplete for airflow
RUN if command -v airflow; then \
register-python-argcomplete airflow >> ~/.bashrc ; \
fi
# Install autocomplete for Kubectl
RUN echo "source /etc/bash_completion" >> ~/.bashrc
# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not
# copying over stuff that is accidentally generated or that we do not need (such as egg-info)
# if you want to add something that is missing and you expect to see it in the image you can
# add it with ! in .dockerignore next to the airflow, test etc. directories there
COPY . ${AIRFLOW_SOURCES}/
WORKDIR ${AIRFLOW_SOURCES}
ARG BUILD_ID
ARG COMMIT_SHA
ARG AIRFLOW_IMAGE_DATE_CREATED
ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \
GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \
BUILD_ID=${BUILD_ID} \
COMMIT_SHA=${COMMIT_SHA}
# Link dumb-init for backwards compatibility (so that older images also work)
RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
EXPOSE 8080
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow-ci" \
org.apache.airflow.version="${AIRFLOW_VERSION}" \
org.apache.airflow.uid="0" \
org.apache.airflow.gid="0" \
org.apache.airflow.build-id="${BUILD_ID}" \
org.apache.airflow.commit-sha="${COMMIT_SHA}" \
org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \
org.opencontainers.image.created="${AIRFLOW_IMAGE_DATE_CREATED}" \
org.opencontainers.image.authors="dev@airflow.apache.org" \
org.opencontainers.image.url="https://airflow.apache.org" \
org.opencontainers.image.documentation="https://github.com/apache/airflow/IMAGES.rst" \
org.opencontainers.image.source="https://github.com/apache/airflow" \
org.opencontainers.image.version="${AIRFLOW_VERSION}" \
org.opencontainers.image.revision="${COMMIT_SHA}" \
org.opencontainers.image.vendor="Apache Software Foundation" \
org.opencontainers.image.licenses="Apache-2.0" \
org.opencontainers.image.ref.name="airflow-ci-image" \
org.opencontainers.image.title="Continuous Integration Airflow Image" \
org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies"
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD []