blob: 9e428ba251a5c1ad78498d5671ceca3f5908218d [file] [log] [blame]
;
; Licensed to the Apache Software Foundation (ASF) under one or more
; contributor license agreements. See the NOTICE file distributed with
; this work for additional information regarding copyright ownership.
; The ASF licenses this file to You under the Apache License, Version 2.0
; (the "License"); you may not use this file except in compliance with
; the License. You may obtain a copy of the License at
;
; http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
[tox]
# new environments will be excluded by default unless explicitly added to envlist.
envlist = py39,py310,py311,py312,py313,py39-{cloud,cloudcoverage,dask},py310-{cloud,dask},py311-{cloud,dask},py312-{cloud,dask},py313-{cloud,dask},docs,lint,mypy,whitespacelint
toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox}
[pycodestyle]
# Disable all errors and warnings except for the ones related to blank lines.
# pylint does not check the number of blank lines.
select = E3
# Shared environment options.
[testenv]
# Run the tests using pre-released dependencies.
# https://github.com/apache/beam/issues/25668
pip_pre = True
# allow apps that support color to use it.
passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
# Set [] options for pip installation of apache-beam tarball.
extras = test,dataframe
# Don't warn that these commands aren't installed.
allowlist_externals =
false
time
bash
rm
/bin/sh
curl
./codecov
chmod
setenv =
RUN_SKIPPED_PY3_TESTS=0
# Use an isolated tmp dir for tests that get slowed down by scanning /tmp.
TMPDIR={envtmpdir}
# Silence warning about ignoring PYTHONPATH.
PYTHONPATH=
# These 2 magic command overrides are required for Jenkins builds.
# Otherwise we get "OSError: [Errno 2] No such file or directory" errors.
# Source:
# https://github.com/tox-dev/tox/issues/123#issuecomment-284714629
install_command = {envbindir}/python {envbindir}/pip install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python {envbindir}/pip freeze
commands_pre =
python --version
pip --version
pip check
bash {toxinidir}/scripts/run_tox_cleanup.sh
commands_post =
bash {toxinidir}/scripts/run_tox_cleanup.sh
commands = false {envname} is misconfigured
[testenv:py{39,310,311,312,313}]
commands_pre =
python --version
pip --version
pip check
bash {toxinidir}/scripts/run_tox_cleanup.sh
deps =
numpy==1.26.4
commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py{39,310,311,312,313}-macos]
commands_pre =
python --version
pip --version
# pip check
bash {toxinidir}/scripts/run_tox_cleanup.sh
commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py{39,310,311,312,313}-win]
commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
[testenv:py{39,310,311,312,313}-cloud]
; extras = test,gcp,interactive,dataframe,aws,azure
extras = test,gcp,interactive,dataframe,aws,azure
commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py{39,310,311}-ml]
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
deps =
pip==25.0.1
accelerate>=1.6.0
onnx<1.19.0
setenv =
extras = test,gcp,dataframe,ml_test
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py312-ml]
# many packages do not support py3.12
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
deps =
accelerate>=1.6.0
setenv =
extras = test,gcp,dataframe,p312_ml_test
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
[testenv:py{39,310,311,31,313}-dask]
extras = test,dask,dataframes
commands_pre =
pip install 'distributed>=2024.4.2' 'dask>=2024.4.2'
commands =
bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/
[testenv:py{39,310,311,312,313}-win-dask]
# use the tight range since the latest dask requires cloudpickle 3.0
commands_pre =
pip install 'distributed>=2024.4.2,<2024.9.0' 'dask>=2024.4.2,<2024.9.0'
commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/
install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
[testenv:py39-cloudcoverage]
deps =
pytest-cov==3.0.0
platform = linux
passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_*,DOCKER_*,TESTCONTAINERS_*,TC_*
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
PYTHONPATH = {toxinidir}
DOCKER_HOST = {env:DOCKER_HOST}
TC_TIMEOUT = {env:TC_TIMEOUT:120}
TC_MAX_TRIES = {env:TC_MAX_TRIES:120}
TC_SLEEP_TIME = {env:TC_SLEEP_TIME:1}
# NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower.
extras = test,gcp,interactive,dataframe,aws
commands =
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append"
[testenv:lint]
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
setenv =
# keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml
deps =
astroid<2.17.0,>=2.15.6
pycodestyle==2.8.0
pylint==2.17.5
isort==4.2.15
flake8==4.0.1
commands =
pylint --version
time {toxinidir}/scripts/run_pylint.sh
[testenv:whitespacelint]
setenv =
deps =
whitespacelint==1.1.0
commands =
time {toxinidir}/scripts/run_whitespacelint.sh
[testenv:mypy]
deps =
mypy==1.13.0
dask==2022.01.0
distributed==2022.01.0
# make extras available in case any of these libs are typed
extras =
gcp
commands =
mypy --version
python setup.py mypy
[testenv:docs]
extras = test,gcp,docs,interactive,dataframe,dask
deps =
Sphinx==7.4.7
sphinx_rtd_theme==3.0.1
docutils>=0.18.1
Jinja2==3.1.0
commands =
time {toxinidir}/scripts/generate_pydoc.sh
[testenv:hdfs_integration_test]
# Used by hdfs_integration_test.sh. Do not run this directly, as it depends on
# nodes defined in hdfs_integration_test/docker-compose.yml.
deps =
holdup==1.8.0
extras =
gcp
allowlist_externals =
bash
echo
sleep
wget
hdfscli
passenv = HDFSCLI_CONFIG
commands =
holdup -t 45 http://namenode:50070 http://datanode:50075
echo "Waiting for safe mode to end."
sleep 45
wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt
hdfscli -v -v -v upload -f kinglear.txt /
python -m apache_beam.examples.wordcount \
--input hdfs://kinglear* \
--output hdfs://py-wordcount-integration \
--hdfs_host namenode --hdfs_port 50070 --hdfs_user root
python -m apache_beam.examples.wordcount \
--input hdfs://unused_server/kinglear* \
--output hdfs://unused_server/py-wordcount-integration \
--hdfs_host namenode --hdfs_port 50070 --hdfs_user root --hdfs_full_urls
commands_pre =
pip check
[testenv:azure_integration_test]
# Used by azure/integration_test/azure_integration_test.sh.
# Do not run this directly, as it depends on nodes defined in
# azure/integration_test/docker-compose.yml.
deps =
extras =
azure
passenv = REQUESTS_CA_BUNDLE
allowlist_externals =
wget
az
bash
setenv =
CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=https://azurite:10000/devstoreaccount1;
commands_pre =
pip check
wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt
# Create container for storing files.
az storage container create -n container --connection-string {env:CONNECTION_STRING}
# Upload test file.
az storage blob upload -f kinglear.txt -c container -n kinglear.txt --connection-string {env:CONNECTION_STRING}
commands =
# Test --azure_connection_string
python -m apache_beam.examples.wordcount \
--input azfs://devstoreaccount1/container/* \
--output azfs://devstoreaccount1/container/py-wordcount-integration \
--azure_connection_string {env:CONNECTION_STRING}
# This doesn't work because there's no way to send a fake bearer token to
# Azurite when using DefaultAzureCredential.
# See https://github.com/Azure/Azurite/issues/389#issuecomment-615298432
# and https://github.com/Azure/Azurite/issues/1750#issue-1449778593
#python -m apache_beam.examples.wordcount \
# --input azfs://devstoreaccount1/container/* \
# --output azfs://devstoreaccount1/container/py-wordcount-integration \
# --blob_service_endpoint https://azurite:10000/devstoreaccount1/container-name \
# --azure_managed_identity_client_id "abc123"
[testenv:py3-yapf]
# keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml
deps =
yapf==0.43.0
commands =
yapf --version
time yapf --in-place --parallel --recursive apache_beam
[testenv:py3-yapf-check]
# keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml
deps =
yapf==0.43.0
commands =
yapf --version
time yapf --diff --parallel --recursive apache_beam
[testenv:jest]
setenv =
deps =
jupyterlab==4.3.6
commands =
time {toxinidir}/scripts/setup_nodejs.sh
time {toxinidir}/scripts/run_jest.sh
[testenv:eslint]
setenv =
deps =
jupyterlab==4.3.6
commands =
time {toxinidir}/scripts/setup_nodejs.sh
time {toxinidir}/scripts/run_eslint.sh
[testenv:flink-runner-test]
extras = test
commands =
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/flink_runner_test.py {posargs}
[testenv:samza-runner-test]
passenv = JAVA_HOME
extras = test
commands =
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/samza_runner_test.py {posargs}
[testenv:spark-runner-test]
extras = test
commands =
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs}
[testenv:prism-runner-test]
extras = test
commands =
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/prism_runner_test.py {posargs}
[testenv:py{39,310}-pyarrow-{3,9,10,11,12,13,14,15,16,17,18}]
deps =
# As a courtesy to users, test against the oldest allowed version of Pyarrow.
# We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated.
# Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test.
3: pyarrow>=3,<4
3: pandas<2
3: numpy>=1.14.3,<1.27.0
# Test against versions of pyarrow released in last ~2 years.
9: pyarrow>=9,<10
9: pandas==2.1.4
10: pyarrow>=10,<11
11: pyarrow>=11,<12
12: pyarrow>=12,<13
13: pyarrow>=13,<14
14: pyarrow>=14,<15
15: pyarrow>=15,<16
16: pyarrow>=16,<17
17: pyarrow>=17,<18
18: pyarrow>=18,<19
numpy==1.26.4
commands =
# Log pyarrow and numpy version for debugging
/bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"
# Run pytest directly rather using run_pytest.sh. It doesn't handle
# selecting tests with -m (BEAM-12985).
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,310}-pandas-{14,15,20}]
deps =
14: pandas>=1.4.3,<1.5.0
14: numpy>=1.14.3,<1.27.0
# Exclude 1.5.0 and 1.5.1 because of https://github.com/pandas-dev/pandas/issues/45725
15: pandas>=1.5.2,<1.6.0
15: numpy>=1.14.3,<1.27.0
20: pandas>=2.0.0,<2.1.0
20: pyarrow>=7
20: numpy>=1.14.3,<1.27.0
commands =
# Log pandas and numpy version for debugging
/bin/sh -c "pip freeze | grep -E '(pandas|numpy)'"
# Run all DataFrame API unit tests
bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe'
[testenv:py{39,310}-tft-{113,114}]
deps =
# Help pip resolve conflict with typing-extensions due to an old version of tensorflow https://github.com/apache/beam/issues/30852
113: pydantic<2.0
114: tensorflow_transform>=1.14.0,<1.15.0
commands =
bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py'
[testenv:py{39,310}-pytorch-{19,110,111,112,113}]
deps =
19: torch>=1.9.0,<1.10.0
110: torch>=1.10.0,<1.11.0
111: torch>=1.11.0,<1.12.0
112: torch>=1.12.0,<1.13.0
113: torch>=1.13.0,<1.14.0
numpy==1.26.4
extras = test,gcp
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
commands =
# Log torch version for debugging
/bin/sh -c "pip freeze | grep -E torch"
# Run all PyTorch<2 unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,310}-pytorch-200]
deps =
200:
torch>=2.0.0,<2.1.0
mpmath==1.3.0
numpy==1.26.4
extras = test,gcp
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
commands =
# Log torch version for debugging
/bin/sh -c "pip freeze | grep -E torch"
# Run all PyTorch>=2 unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{311,312}-pytorch-271]
deps =
200:
torch>=2.7.1,<2.8.0
mpmath==1.3.0
numpy==1.26.4
extras = test,gcp
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
commands =
# Log torch version for debugging
/bin/sh -c "pip freeze | grep -E torch"
# Run all PyTorch>=2 unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task in tox/py39/build.gradle once onnx supports protobuf 4.x.x
[testenv:py{39,310}-onnx-113]
# TODO(https://github.com/apache/beam/issues/25443)
# apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...}
# when multiple onnx versions are tested.
deps =
onnxruntime==1.13.1
pandas==1.5.2
torch==1.13.1
tensorflow==2.11.0
tf2onnx==1.13.0
skl2onnx==1.13
transformers==4.25.1
extras = test,gcp
commands =
# Log onnx version for debugging
/bin/sh -c "pip freeze | grep -E onnx"
# Run all ONNX unit tests
pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs}
[testenv:py39-tensorflow-212]
deps =
212:
tensorflow>=2.12rc1,<2.13
# Help pip resolve conflict with typing-extensions for old version of TF https://github.com/apache/beam/issues/30852
pydantic<2.7
extras = test,gcp
commands_pre =
pip install -U 'protobuf==4.25.5'
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
# Run all Tensorflow unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py312-tensorflow-216]
deps =
216:
tensorflow>=2.16.1,<2.17
# Help pip resolve conflict with typing-extensions for old version of TF https://github.com/apache/beam/issues/30852
pydantic<2.7
extras = test,gcp
commands_pre =
pip install -U 'protobuf==4.25.5'
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
# Run all Tensorflow unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py39-xgboost-{160,170}]
deps =
160:
xgboost>=1.6.0,<1.7.0
datatable==1.0.0
170:
xgboost>=1.7.0
datatable==1.0.0
extras = test,gcp
commands =
# Log XGBoost version for debugging
/bin/sh -c "pip freeze | grep -E xgboost"
# Run all XGBoost unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_xgboost {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,310}-transformers-{428,447,448,latest}]
deps =
# sentence-transformers 2.2.2 is the latest version that supports transformers 4.28.x
428: sentence-transformers==2.2.2
428: transformers>=4.28.0,<4.29.0
428: torch>=1.9.0,<1.14.0
447: transformers>=4.47.0,<4.48.0
447: torch>=1.9.0,<1.14.0
455: transformers>=4.55.0,<4.56.0
455: torch>=2.0.0,<2.1.0
latest: transformers>=4.55.0
latest: torch>=2.0.0
latest: accelerate>=1.6.0
tensorflow==2.12.0
protobuf==4.25.5
pip==25.0.1
extras = test,gcp,ml_test
commands =
# Log transformers and its dependencies version for debugging
/bin/sh -c "pip freeze | grep -E transformers"
/bin/sh -c "pip freeze | grep -E torch"
/bin/sh -c "pip freeze | grep -E tensorflow"
# Run all Transformers unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_transformers {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,312}-vertex-ai]
deps =
tensorflow==2.12.0
extras = test,gcp
commands =
# Log aiplatform and its dependencies version for debugging
/bin/sh -c "pip freeze | grep -E google-cloud-aiplatform"
/bin/sh -c "pip freeze | grep -E tensorflow"
# Run all Vertex AI unit tests
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_vertex_ai {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,310}-embeddings]
deps =
sentence-transformers==3.3.1
accelerate>=1.6.0
passenv = HF_INFERENCE_TOKEN
extras = test,gcp
commands =
# Log aiplatform and its dependencies version for debugging
/bin/sh -c "pip freeze | grep -E sentence-transformers"
/bin/sh -c "pip freeze | grep -E google-cloud-aiplatform"
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest apache_beam/ml/transforms/embeddings -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
[testenv:py{39,310}-TFHubEmbeddings-{014,015}]
deps =
014: tensorflow-hub>=0.14.0,<0.15.0
# Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
014: pydantic<2.7
015: tensorflow-hub>=0.15.0,<0.16.0
# Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
015: pydantic<2.7
tensorflow-text # required to register ops for text embedding models.
extras = test,gcp
commands =
# Log aiplatform and its dependencies version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings'
[testenv:py{310,312}-dill]
extras = test,dill
commands =
# Log dill version for debugging
/bin/sh -c "pip freeze | grep -E dill"
# Run all dill-specific tests
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 1 -m uses_dill {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'