; ; Licensed to the Apache Software Foundation (ASF) under one or more ; contributor license agreements. See the NOTICE file distributed with ; this work for additional information regarding copyright ownership. ; The ASF licenses this file to You under the Apache License, Version 2.0 ; (the "License"); you may not use this file except in compliance with ; the License. You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; [tox] # new environments will be excluded by default unless explicitly added to envlist. envlist = py39,py310,py311,py312,py313,py39-{cloud,cloudcoverage,dask},py310-{cloud,dask},py311-{cloud,dask},py312-{cloud,dask},py313-{cloud,dask},docs,lint,mypy,whitespacelint toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox} [pycodestyle] # Disable all errors and warnings except for the ones related to blank lines. # pylint does not check the number of blank lines. select = E3 # Shared environment options. [testenv] # Run the tests using pre-released dependencies. # https://github.com/apache/beam/issues/25668 pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. extras = test,dataframe # Don't warn that these commands aren't installed. allowlist_externals = false time bash rm /bin/sh curl ./codecov chmod setenv = RUN_SKIPPED_PY3_TESTS=0 # Use an isolated tmp dir for tests that get slowed down by scanning /tmp. TMPDIR={envtmpdir} # Silence warning about ignoring PYTHONPATH. PYTHONPATH= # These 2 magic command overrides are required for Jenkins builds. # Otherwise we get "OSError: [Errno 2] No such file or directory" errors. # Source: # https://github.com/tox-dev/tox/issues/123#issuecomment-284714629 install_command = {envbindir}/python {envbindir}/pip install --retries 10 {opts} {packages} list_dependencies_command = {envbindir}/python {envbindir}/pip freeze commands_pre = python --version pip --version pip check bash {toxinidir}/scripts/run_tox_cleanup.sh commands_post = bash {toxinidir}/scripts/run_tox_cleanup.sh commands = false {envname} is misconfigured [testenv:py{39,310,311,312,313}] commands_pre = python --version pip --version pip check bash {toxinidir}/scripts/run_tox_cleanup.sh deps = numpy==1.26.4 commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312,313}-macos] commands_pre = python --version pip --version # pip check bash {toxinidir}/scripts/run_tox_cleanup.sh commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312,313}-win] commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze [testenv:py{39,310,311,312,313}-cloud] ; extras = test,gcp,interactive,dataframe,aws,azure extras = test,gcp,interactive,dataframe,aws,azure commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311}-ml] # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. deps = pip==25.0.1 accelerate>=1.6.0 setenv = extras = test,gcp,dataframe,ml_test commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py312-ml] # many packages do not support py3.12 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. deps = accelerate>=1.6.0 setenv = extras = test,gcp,dataframe,p312_ml_test commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,31,313}-dask] extras = test,dask,dataframes commands_pre = pip install 'distributed>=2024.4.2' 'dask>=2024.4.2' commands = bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ [testenv:py{39,310,311,312,313}-win-dask] # use the tight range since the latest dask requires cloudpickle 3.0 commands_pre = pip install 'distributed>=2024.4.2,<2024.9.0' 'dask>=2024.4.2,<2024.9.0' commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze [testenv:py39-cloudcoverage] deps = pytest-cov==3.0.0 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. setenv = PYTHONPATH = {toxinidir} platform = linux passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_* # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower. extras = test,gcp,interactive,dataframe,aws commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append" [testenv:lint] # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint. setenv = # keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml deps = astroid<2.17.0,>=2.15.6 pycodestyle==2.8.0 pylint==2.17.5 isort==4.2.15 flake8==4.0.1 commands = pylint --version time {toxinidir}/scripts/run_pylint.sh [testenv:whitespacelint] setenv = deps = whitespacelint==1.1.0 commands = time {toxinidir}/scripts/run_whitespacelint.sh [testenv:mypy] deps = mypy==1.13.0 dask==2022.01.0 distributed==2022.01.0 # make extras available in case any of these libs are typed extras = gcp commands = mypy --version python setup.py mypy [testenv:docs] extras = test,gcp,docs,interactive,dataframe,dask deps = Sphinx==7.4.7 sphinx_rtd_theme==3.0.1 docutils>=0.18.1 Jinja2==3.1.0 commands = time {toxinidir}/scripts/generate_pydoc.sh [testenv:hdfs_integration_test] # Used by hdfs_integration_test.sh. Do not run this directly, as it depends on # nodes defined in hdfs_integration_test/docker-compose.yml. deps = holdup==1.8.0 extras = gcp allowlist_externals = bash echo sleep wget hdfscli passenv = HDFSCLI_CONFIG commands = holdup -t 45 http://namenode:50070 http://datanode:50075 echo "Waiting for safe mode to end." sleep 45 wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt hdfscli -v -v -v upload -f kinglear.txt / python -m apache_beam.examples.wordcount \ --input hdfs://kinglear* \ --output hdfs://py-wordcount-integration \ --hdfs_host namenode --hdfs_port 50070 --hdfs_user root python -m apache_beam.examples.wordcount \ --input hdfs://unused_server/kinglear* \ --output hdfs://unused_server/py-wordcount-integration \ --hdfs_host namenode --hdfs_port 50070 --hdfs_user root --hdfs_full_urls commands_pre = pip check [testenv:azure_integration_test] # Used by azure/integration_test/azure_integration_test.sh. # Do not run this directly, as it depends on nodes defined in # azure/integration_test/docker-compose.yml. deps = extras = azure passenv = REQUESTS_CA_BUNDLE allowlist_externals = wget az bash setenv = CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=https://azurite:10000/devstoreaccount1; commands_pre = pip check wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt # Create container for storing files. az storage container create -n container --connection-string {env:CONNECTION_STRING} # Upload test file. az storage blob upload -f kinglear.txt -c container -n kinglear.txt --connection-string {env:CONNECTION_STRING} commands = # Test --azure_connection_string python -m apache_beam.examples.wordcount \ --input azfs://devstoreaccount1/container/* \ --output azfs://devstoreaccount1/container/py-wordcount-integration \ --azure_connection_string {env:CONNECTION_STRING} # This doesn't work because there's no way to send a fake bearer token to # Azurite when using DefaultAzureCredential. # See https://github.com/Azure/Azurite/issues/389#issuecomment-615298432 # and https://github.com/Azure/Azurite/issues/1750#issue-1449778593 #python -m apache_beam.examples.wordcount \ # --input azfs://devstoreaccount1/container/* \ # --output azfs://devstoreaccount1/container/py-wordcount-integration \ # --blob_service_endpoint https://azurite:10000/devstoreaccount1/container-name \ # --azure_managed_identity_client_id "abc123" [testenv:py3-yapf] # keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml deps = yapf==0.43.0 commands = yapf --version time yapf --in-place --parallel --recursive apache_beam [testenv:py3-yapf-check] # keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml deps = yapf==0.43.0 commands = yapf --version time yapf --diff --parallel --recursive apache_beam [testenv:jest] setenv = deps = jupyterlab==4.3.6 commands = time {toxinidir}/scripts/setup_nodejs.sh time {toxinidir}/scripts/run_jest.sh [testenv:eslint] setenv = deps = jupyterlab==4.3.6 commands = time {toxinidir}/scripts/setup_nodejs.sh time {toxinidir}/scripts/run_eslint.sh [testenv:flink-runner-test] extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/flink_runner_test.py {posargs} [testenv:samza-runner-test] extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/samza_runner_test.py {posargs} [testenv:spark-runner-test] extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs} [testenv:prism-runner-test] extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/prism_runner_test.py {posargs} [testenv:py{39,310}-pyarrow-{3,9,10,11,12,13,14,15,16,17,18}] deps = # As a courtesy to users, test against the oldest allowed version of Pyarrow. # We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated. # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test. 3: pyarrow>=3,<4 3: pandas<2 3: numpy>=1.14.3,<1.27.0 # Test against versions of pyarrow released in last ~2 years. 9: pyarrow>=9,<10 9: pandas==2.1.4 10: pyarrow>=10,<11 11: pyarrow>=11,<12 12: pyarrow>=12,<13 13: pyarrow>=13,<14 14: pyarrow>=14,<15 15: pyarrow>=15,<16 16: pyarrow>=16,<17 17: pyarrow>=17,<18 18: pyarrow>=18,<19 numpy==1.26.4 commands = # Log pyarrow and numpy version for debugging /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'" # Run pytest directly rather using run_pytest.sh. It doesn't handle # selecting tests with -m (BEAM-12985). # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,310}-pandas-{14,15,20}] deps = 14: pandas>=1.4.3,<1.5.0 14: numpy>=1.14.3,<1.27.0 # Exclude 1.5.0 and 1.5.1 because of https://github.com/pandas-dev/pandas/issues/45725 15: pandas>=1.5.2,<1.6.0 15: numpy>=1.14.3,<1.27.0 20: pandas>=2.0.0,<2.1.0 20: pyarrow>=7 20: numpy>=1.14.3,<1.27.0 commands = # Log pandas and numpy version for debugging /bin/sh -c "pip freeze | grep -E '(pandas|numpy)'" # Run all DataFrame API unit tests bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe' [testenv:py{39,310}-tft-{113,114}] deps = # Help pip resolve conflict with typing-extensions due to an old version of tensorflow https://github.com/apache/beam/issues/30852 113: pydantic<2.0 114: tensorflow_transform>=1.14.0,<1.15.0 commands = bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py' [testenv:py{39,310}-pytorch-{19,110,111,112,113}] deps = 19: torch>=1.9.0,<1.10.0 110: torch>=1.10.0,<1.11.0 111: torch>=1.11.0,<1.12.0 112: torch>=1.12.0,<1.13.0 113: torch>=1.13.0,<1.14.0 numpy==1.26.4 extras = test,gcp # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. setenv = commands = # Log torch version for debugging /bin/sh -c "pip freeze | grep -E torch" # Run all PyTorch<2 unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,310}-pytorch-200] deps = 200: torch>=2.0.0,<2.1.0 mpmath==1.3.0 numpy==1.26.4 extras = test,gcp # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. setenv = commands = # Log torch version for debugging /bin/sh -c "pip freeze | grep -E torch" # Run all PyTorch>=2 unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' # TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task in tox/py39/build.gradle once onnx supports protobuf 4.x.x [testenv:py{39,310}-onnx-113] # TODO(https://github.com/apache/beam/issues/25443) # apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...} # when multiple onnx versions are tested. deps = onnxruntime==1.13.1 pandas==1.5.2 torch==1.13.1 tensorflow==2.11.0 tf2onnx==1.13.0 skl2onnx==1.13 transformers==4.25.1 extras = test,gcp commands = # Log onnx version for debugging /bin/sh -c "pip freeze | grep -E onnx" # Run all ONNX unit tests pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} [testenv:py39-tensorflow-212] deps = 212: tensorflow>=2.12rc1,<2.13 # Help pip resolve conflict with typing-extensions for old version of TF https://github.com/apache/beam/issues/30852 pydantic<2.7 extras = test,gcp commands_pre = pip install -U 'protobuf==4.25.5' commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" # Run all Tensorflow unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py39-xgboost-{160,170}] deps = 160: xgboost>=1.6.0,<1.7.0 datatable==1.0.0 170: xgboost>=1.7.0 datatable==1.0.0 extras = test,gcp commands = # Log XGBoost version for debugging /bin/sh -c "pip freeze | grep -E xgboost" # Run all XGBoost unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_xgboost {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,310}-transformers-{428,447,448,latest}] deps = # sentence-transformers 2.2.2 is the latest version that supports transformers 4.28.x 428: sentence-transformers==2.2.2 428: transformers>=4.28.0,<4.29.0 428: torch>=1.9.0,<1.14.0 447: transformers>=4.47.0,<4.48.0 447: torch>=1.9.0,<1.14.0 448: transformers>=4.48.0,<4.49.0 448: torch>=2.0.0,<2.1.0 latest: transformers>=4.48.0 latest: torch>=2.0.0 latest: accelerate>=1.6.0 tensorflow==2.12.0 protobuf==4.25.5 pip==25.0.1 extras = test,gcp,ml_test commands = # Log transformers and its dependencies version for debugging /bin/sh -c "pip freeze | grep -E transformers" /bin/sh -c "pip freeze | grep -E torch" /bin/sh -c "pip freeze | grep -E tensorflow" # Run all Transformers unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_transformers {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,312}-vertex-ai] deps = tensorflow==2.12.0 extras = test,gcp commands = # Log aiplatform and its dependencies version for debugging /bin/sh -c "pip freeze | grep -E google-cloud-aiplatform" /bin/sh -c "pip freeze | grep -E tensorflow" # Run all Vertex AI unit tests # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_vertex_ai {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,310}-embeddings] deps = sentence-transformers==3.3.1 accelerate>=1.6.0 passenv = HF_INFERENCE_TOKEN extras = test,gcp commands = # Log aiplatform and its dependencies version for debugging /bin/sh -c "pip freeze | grep -E sentence-transformers" /bin/sh -c "pip freeze | grep -E google-cloud-aiplatform" # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest apache_beam/ml/transforms/embeddings -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' [testenv:py{39,310}-TFHubEmbeddings-{014,015}] deps = 014: tensorflow-hub>=0.14.0,<0.15.0 # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852 014: pydantic<2.7 015: tensorflow-hub>=0.15.0,<0.16.0 # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852 015: pydantic<2.7 tensorflow-text # required to register ops for text embedding models. extras = test,gcp commands = # Log aiplatform and its dependencies version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings'