;
;    Licensed to the Apache Software Foundation (ASF) under one or more
;    contributor license agreements.  See the NOTICE file distributed with
;    this work for additional information regarding copyright ownership.
;    The ASF licenses this file to You under the Apache License, Version 2.0
;    (the "License"); you may not use this file except in compliance with
;    the License.  You may obtain a copy of the License at
;
;       http://www.apache.org/licenses/LICENSE-2.0
;
;    Unless required by applicable law or agreed to in writing, software
;    distributed under the License is distributed on an "AS IS" BASIS,
;    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;    See the License for the specific language governing permissions and
;    limitations under the License.
;

[tox]
# new environments will be excluded by default unless explicitly added to envlist.
envlist = py39,py310,py311,py312,py313,py39-{cloud,cloudcoverage,dask},py310-{cloud,dask},py311-{cloud,dask},py312-{cloud,dask},py313-{cloud,dask},docs,lint,mypy,whitespacelint
toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox}

[pycodestyle]
# Disable all errors and warnings except for the ones related to blank lines.
# pylint does not check the number of blank lines.
select = E3

# Shared environment options.
[testenv]
# Run the tests using pre-released dependencies.
# https://github.com/apache/beam/issues/25668
pip_pre = True
# allow apps that support color to use it.
passenv=TERM,CLOUDSDK_CONFIG
# Set [] options for pip installation of apache-beam tarball.
extras = test,dataframe
# Don't warn that these commands aren't installed.
allowlist_externals =
  false
  time
  bash
  rm
  /bin/sh
  curl
  ./codecov
  chmod
setenv =
  RUN_SKIPPED_PY3_TESTS=0
  # Use an isolated tmp dir for tests that get slowed down by scanning /tmp.
  TMPDIR={envtmpdir}
  # Silence warning about ignoring PYTHONPATH.
  PYTHONPATH=

# These 2 magic command overrides are required for Jenkins builds.
# Otherwise we get "OSError: [Errno 2] No such file or directory" errors.
# Source:
# https://github.com/tox-dev/tox/issues/123#issuecomment-284714629
install_command = {envbindir}/python {envbindir}/pip install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python {envbindir}/pip freeze
commands_pre =
  python --version
  pip --version
  pip check
  bash {toxinidir}/scripts/run_tox_cleanup.sh
commands_post =
  bash {toxinidir}/scripts/run_tox_cleanup.sh

commands = false {envname} is misconfigured

[testenv:py{39,310,311,312,313}]
commands_pre =
  python --version
  pip --version
  pip check
  bash {toxinidir}/scripts/run_tox_cleanup.sh
deps =
  numpy==1.26.4
commands =
  python apache_beam/examples/complete/autocomplete_test.py
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311,312,313}-macos]
commands_pre =
  python --version
  pip --version
  # pip check
  bash {toxinidir}/scripts/run_tox_cleanup.sh
commands =
  python apache_beam/examples/complete/autocomplete_test.py
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311,312,313}-win]
commands =
  python apache_beam/examples/complete/autocomplete_test.py
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze

[testenv:py{39,310,311,312,313}-cloud]
; extras = test,gcp,interactive,dataframe,aws,azure
extras = test,gcp,interactive,dataframe,aws,azure
commands =
  python apache_beam/examples/complete/autocomplete_test.py
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311}-ml]
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
deps =
  pip==25.0.1
  accelerate>=1.6.0
setenv =
extras = test,gcp,dataframe,ml_test
commands =
  # Log tensorflow version for debugging
  /bin/sh -c "pip freeze | grep -E tensorflow"
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py312-ml]
# many packages do not support py3.12
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
deps =
  accelerate>=1.6.0
setenv =
extras = test,gcp,dataframe,p312_ml_test
commands =
  # Log tensorflow version for debugging
  /bin/sh -c "pip freeze | grep -E tensorflow"
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311,31,313}-dask]
extras = test,dask,dataframes
commands_pre =
  pip install 'distributed>=2024.4.2' 'dask>=2024.4.2'
commands =
  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/

[testenv:py{39,310,311,312,313}-win-dask]
# use the tight range since the latest dask requires cloudpickle 3.0
commands_pre =
  pip install 'distributed>=2024.4.2,<2024.9.0' 'dask>=2024.4.2,<2024.9.0'
commands =
  python apache_beam/examples/complete/autocomplete_test.py
  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/
install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze

[testenv:py39-cloudcoverage]
deps =
  pytest-cov==3.0.0
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
  PYTHONPATH = {toxinidir}
platform = linux
passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_*
# NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower.
extras = test,gcp,interactive,dataframe,aws
commands =
  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append"

[testenv:lint]
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
setenv =
# keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml
deps =
  astroid<2.17.0,>=2.15.6
  pycodestyle==2.8.0
  pylint==2.17.5
  isort==4.2.15
  flake8==4.0.1
commands =
  pylint --version
  time {toxinidir}/scripts/run_pylint.sh

[testenv:whitespacelint]
setenv =
deps =
  whitespacelint==1.1.0
commands =
  time {toxinidir}/scripts/run_whitespacelint.sh

[testenv:mypy]
deps =
  mypy==1.13.0
  dask==2022.01.0
  distributed==2022.01.0
# make extras available in case any of these libs are typed
extras =
  gcp
commands =
  mypy --version
  python setup.py mypy


[testenv:docs]
extras = test,gcp,docs,interactive,dataframe,dask
deps =
  Sphinx==7.4.7
  sphinx_rtd_theme==3.0.1
  docutils>=0.18.1
  Jinja2==3.1.0
commands =
  time {toxinidir}/scripts/generate_pydoc.sh

[testenv:hdfs_integration_test]
# Used by hdfs_integration_test.sh. Do not run this directly, as it depends on
# nodes defined in hdfs_integration_test/docker-compose.yml.
deps =
  holdup==1.8.0
extras =
  gcp
allowlist_externals =
  bash
  echo
  sleep
  wget
  hdfscli
passenv = HDFSCLI_CONFIG
commands =
  holdup -t 45 http://namenode:50070 http://datanode:50075
  echo "Waiting for safe mode to end."
  sleep 45
  wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt
  hdfscli -v -v -v upload -f kinglear.txt /
  python -m apache_beam.examples.wordcount \
      --input hdfs://kinglear* \
      --output hdfs://py-wordcount-integration \
      --hdfs_host namenode --hdfs_port 50070 --hdfs_user root
  python -m apache_beam.examples.wordcount \
      --input hdfs://unused_server/kinglear* \
      --output hdfs://unused_server/py-wordcount-integration \
      --hdfs_host namenode --hdfs_port 50070 --hdfs_user root --hdfs_full_urls
commands_pre =
  pip check

[testenv:azure_integration_test]
# Used by azure/integration_test/azure_integration_test.sh.
# Do not run this directly, as it depends on nodes defined in
# azure/integration_test/docker-compose.yml.
deps =
extras =
  azure
passenv = REQUESTS_CA_BUNDLE
allowlist_externals =
  wget
  az
  bash
setenv =
  CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=https://azurite:10000/devstoreaccount1;
commands_pre =
  pip check
  wget storage.googleapis.com/dataflow-samples/shakespeare/kinglear.txt
  # Create container for storing files.
  az storage container create -n container --connection-string {env:CONNECTION_STRING}
  # Upload test file.
  az storage blob upload -f kinglear.txt -c container -n kinglear.txt --connection-string {env:CONNECTION_STRING}
commands =
  # Test --azure_connection_string
  python -m apache_beam.examples.wordcount \
      --input azfs://devstoreaccount1/container/* \
      --output azfs://devstoreaccount1/container/py-wordcount-integration \
      --azure_connection_string {env:CONNECTION_STRING}
  # This doesn't work because there's no way to send a fake bearer token to
  # Azurite when using DefaultAzureCredential.
  # See https://github.com/Azure/Azurite/issues/389#issuecomment-615298432
  # and https://github.com/Azure/Azurite/issues/1750#issue-1449778593
  #python -m apache_beam.examples.wordcount \
  #    --input azfs://devstoreaccount1/container/* \
  #    --output azfs://devstoreaccount1/container/py-wordcount-integration \
  #    --blob_service_endpoint https://azurite:10000/devstoreaccount1/container-name \
  #    --azure_managed_identity_client_id "abc123"

[testenv:py3-yapf]
# keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml
deps =
  yapf==0.43.0
commands =
  yapf --version
  time yapf --in-place --parallel --recursive apache_beam

[testenv:py3-yapf-check]
# keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml
deps =
  yapf==0.43.0
commands =
  yapf --version
  time yapf --diff --parallel --recursive apache_beam

[testenv:jest]
setenv =
deps =
  jupyterlab==4.3.6
commands =
  time {toxinidir}/scripts/setup_nodejs.sh
  time {toxinidir}/scripts/run_jest.sh

[testenv:eslint]
setenv =
deps =
  jupyterlab==4.3.6
commands =
  time {toxinidir}/scripts/setup_nodejs.sh
  time {toxinidir}/scripts/run_eslint.sh

[testenv:flink-runner-test]
extras = test
commands =
  bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/flink_runner_test.py {posargs}

[testenv:samza-runner-test]
extras = test
commands =
  bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/samza_runner_test.py {posargs}

[testenv:spark-runner-test]
extras = test
commands =
  bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/spark_runner_test.py {posargs}

[testenv:prism-runner-test]
extras = test
commands =
  bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/prism_runner_test.py {posargs}

[testenv:py{39,310}-pyarrow-{3,9,10,11,12,13,14,15,16,17,18}]
deps =
  # As a courtesy to users, test against the oldest allowed version of Pyarrow.
  # We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated.
  # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test.
  3: pyarrow>=3,<4
  3: pandas<2
  3: numpy>=1.14.3,<1.27.0
  # Test against versions of pyarrow released in last ~2 years.
  9: pyarrow>=9,<10
  9: pandas==2.1.4
  10: pyarrow>=10,<11
  11: pyarrow>=11,<12
  12: pyarrow>=12,<13
  13: pyarrow>=13,<14
  14: pyarrow>=14,<15
  15: pyarrow>=15,<16
  16: pyarrow>=16,<17
  17: pyarrow>=17,<18
  18: pyarrow>=18,<19
  numpy==1.26.4
commands =
  # Log pyarrow and numpy version for debugging
  /bin/sh -c "pip freeze | grep -E '(pyarrow|numpy)'"
  # Run pytest directly rather using run_pytest.sh. It doesn't handle
  # selecting tests with -m (BEAM-12985).
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pyarrow {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'


[testenv:py{39,310}-pandas-{14,15,20}]
deps =
  14: pandas>=1.4.3,<1.5.0
  14: numpy>=1.14.3,<1.27.0
  # Exclude 1.5.0 and 1.5.1 because of https://github.com/pandas-dev/pandas/issues/45725
  15: pandas>=1.5.2,<1.6.0
  15: numpy>=1.14.3,<1.27.0
  20: pandas>=2.0.0,<2.1.0
  20: pyarrow>=7
  20: numpy>=1.14.3,<1.27.0
commands =
  # Log pandas and numpy version for debugging
  /bin/sh -c "pip freeze | grep -E '(pandas|numpy)'"
  # Run all DataFrame API unit tests
  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe'

[testenv:py{39,310}-tft-{113,114}]
deps =
  # Help pip resolve conflict with typing-extensions due to an old version of tensorflow https://github.com/apache/beam/issues/30852
  113: pydantic<2.0
  114: tensorflow_transform>=1.14.0,<1.15.0
commands =
  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py'

[testenv:py{39,310}-pytorch-{19,110,111,112,113}]
deps =
  19: torch>=1.9.0,<1.10.0
  110: torch>=1.10.0,<1.11.0
  111: torch>=1.11.0,<1.12.0
  112: torch>=1.12.0,<1.13.0
  113: torch>=1.13.0,<1.14.0
  numpy==1.26.4
extras = test,gcp
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
commands =
  # Log torch version for debugging
  /bin/sh -c "pip freeze | grep -E torch"
  # Run all PyTorch<2 unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

[testenv:py{39,310}-pytorch-200]
deps =
  200:
    torch>=2.0.0,<2.1.0
    mpmath==1.3.0
    numpy==1.26.4
extras = test,gcp
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
commands =
  # Log torch version for debugging
  /bin/sh -c "pip freeze | grep -E torch"
  # Run all PyTorch>=2  unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task in tox/py39/build.gradle once onnx supports protobuf 4.x.x
[testenv:py{39,310}-onnx-113]
# TODO(https://github.com/apache/beam/issues/25443)
# apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...}
# when multiple onnx versions are tested.
deps =
  onnxruntime==1.13.1
  pandas==1.5.2
  torch==1.13.1
  tensorflow==2.11.0
  tf2onnx==1.13.0
  skl2onnx==1.13
  transformers==4.25.1
extras = test,gcp
commands =
  # Log onnx version for debugging
  /bin/sh -c "pip freeze | grep -E onnx"
  # Run all ONNX unit tests
  pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs}

[testenv:py39-tensorflow-212]
deps =
  212:
    tensorflow>=2.12rc1,<2.13
    # Help pip resolve conflict with typing-extensions for old version of TF https://github.com/apache/beam/issues/30852
    pydantic<2.7
extras = test,gcp
commands_pre =
  pip install -U 'protobuf==4.25.5'
commands =
  # Log tensorflow version for debugging
  /bin/sh -c "pip freeze | grep -E tensorflow"
  # Run all Tensorflow unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_tf {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

[testenv:py39-xgboost-{160,170}]
deps =
  160:
    xgboost>=1.6.0,<1.7.0
    datatable==1.0.0
  170:
    xgboost>=1.7.0
    datatable==1.0.0
extras = test,gcp
commands =
  # Log XGBoost version for debugging
  /bin/sh -c "pip freeze | grep -E xgboost"
  # Run all XGBoost unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_xgboost {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

[testenv:py{39,310}-transformers-{428,447,448,latest}]
deps =
  # sentence-transformers 2.2.2 is the latest version that supports transformers 4.28.x
  428: sentence-transformers==2.2.2
  428: transformers>=4.28.0,<4.29.0
  428: torch>=1.9.0,<1.14.0
  447: transformers>=4.47.0,<4.48.0
  447: torch>=1.9.0,<1.14.0
  448: transformers>=4.48.0,<4.49.0
  448: torch>=2.0.0,<2.1.0
  latest: transformers>=4.48.0
  latest: torch>=2.0.0
  latest: accelerate>=1.6.0
  tensorflow==2.12.0
  protobuf==4.25.5
  pip==25.0.1
extras = test,gcp,ml_test
commands =
  # Log transformers and its dependencies version for debugging
  /bin/sh -c "pip freeze | grep -E transformers"
  /bin/sh -c "pip freeze | grep -E torch"
  /bin/sh -c "pip freeze | grep -E tensorflow"
  # Run all Transformers unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_transformers {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

[testenv:py{39,312}-vertex-ai]
deps =
  tensorflow==2.12.0
extras = test,gcp
commands =
  # Log aiplatform and its dependencies version for debugging
  /bin/sh -c "pip freeze | grep -E google-cloud-aiplatform"
  /bin/sh -c "pip freeze | grep -E tensorflow"
  # Run all Vertex AI unit tests
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_vertex_ai {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'


[testenv:py{39,310}-embeddings]
deps =
  sentence-transformers==3.3.1
  accelerate>=1.6.0
passenv = HF_INFERENCE_TOKEN
extras = test,gcp
commands =
  # Log aiplatform and its dependencies version for debugging
  /bin/sh -c "pip freeze | grep -E sentence-transformers"
  /bin/sh -c "pip freeze | grep -E google-cloud-aiplatform"
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  /bin/sh -c 'pytest apache_beam/ml/transforms/embeddings -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'


[testenv:py{39,310}-TFHubEmbeddings-{014,015}]
deps =
  014: tensorflow-hub>=0.14.0,<0.15.0
  # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
  014: pydantic<2.7
  015: tensorflow-hub>=0.15.0,<0.16.0
  # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
  015: pydantic<2.7
  tensorflow-text # required to register ops for text embedding models.

extras = test,gcp
commands =
  # Log aiplatform and its dependencies version for debugging
  /bin/sh -c "pip freeze | grep -E tensorflow"
  # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings'