From 73890e703acb54370405fd404819769bcfbfc45b Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 11:13:17 +0100 Subject: [PATCH 1/8] Update [ghstack-poisoned] --- .github/scripts/pre-build-script-win.sh | 4 - .../linux_libs/scripts_gym/setup_env.sh | 2 +- .../windows_optdepts/scripts/unittest.sh | 5 +- .github/workflows/benchmarks.yml | 10 +- .github/workflows/benchmarks_pr.yml | 10 +- .github/workflows/nightly_build.yml | 174 ++++++++++++------ .github/workflows/test-linux.yml | 1 + test/test_collector.py | 1 + 8 files changed, 138 insertions(+), 69 deletions(-) diff --git a/.github/scripts/pre-build-script-win.sh b/.github/scripts/pre-build-script-win.sh index d30410431b9..2af85dc90a0 100644 --- a/.github/scripts/pre-build-script-win.sh +++ b/.github/scripts/pre-build-script-win.sh @@ -3,7 +3,3 @@ pip install --upgrade setuptools export TORCHRL_BUILD_VERSION=0.8.0 - -${CONDA_RUN} pip install "pybind11[global]" -${CONDA_RUN} conda install anaconda::cmake -y -${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh index 420f2099055..533d892376a 100755 --- a/.github/unittest/linux_libs/scripts_gym/setup_env.sh +++ b/.github/unittest/linux_libs/scripts_gym/setup_env.sh @@ -92,7 +92,7 @@ conda env config vars set \ # make env variables apparent conda deactivate && conda activate "${env_dir}" -pip install pip --upgrade +# pip install pip --upgrade conda env update --file "${this_dir}/environment.yml" --prune #conda install -c conda-forge fltk -y diff --git a/.github/unittest/windows_optdepts/scripts/unittest.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh index f8adfe696f2..fc96b748db8 100755 --- a/.github/unittest/windows_optdepts/scripts/unittest.sh +++ b/.github/unittest/windows_optdepts/scripts/unittest.sh @@ -93,9 +93,12 @@ fi # install tensordict if [[ "$RELEASE" == 0 ]]; then + conda install anaconda::cmake -y + git clone https://github.com/pytorch/tensordict cd tensordict - python setup.py develop + pip install -e . + cd .. else pip3 install tensordict diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index f90b02055fe..a39458573b2 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -23,7 +23,7 @@ jobs: benchmark_cpu: name: CPU Pytest benchmark - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Who triggered this? run: | @@ -41,7 +41,12 @@ jobs: python3.10 -m venv ./py310 source ./py310/bin/activate + apt-get update -y + apt-get upgrade -y + apt-get -y install cmake + python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + python3 -m pip install "pybind11[global]" python3 -m pip install git+https://github.com/pytorch/tensordict python3 setup.py develop python3 -m pip install pytest pytest-benchmark @@ -121,7 +126,8 @@ jobs: export PYTHON_INCLUDE_DIR=/usr/include/python3.10 python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U - python3.10 -m pip install cmake ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" + python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" + python3 -m pip install "pybind11[global]" python3.10 -m pip install git+https://github.com/pytorch/tensordict python3.10 setup.py develop diff --git a/.github/workflows/benchmarks_pr.yml b/.github/workflows/benchmarks_pr.yml index e6e063c340f..4f0584844f7 100644 --- a/.github/workflows/benchmarks_pr.yml +++ b/.github/workflows/benchmarks_pr.yml @@ -14,7 +14,7 @@ jobs: benchmark_cpu: name: CPU Pytest benchmark - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Who triggered this? run: | @@ -39,7 +39,12 @@ jobs: python3.10 -m venv ./py310 source ./py310/bin/activate + apt-get update -y + apt-get upgrade -y + apt-get -y install cmake + python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + python3 -m pip install "pybind11[global]" python3 -m pip install git+https://github.com/pytorch/tensordict python3 setup.py develop python3 -m pip install pytest pytest-benchmark @@ -130,7 +135,8 @@ jobs: export PYTHON_INCLUDE_DIR=/usr/include/python3.10 python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U - python3.10 -m pip install cmake ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" + python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" + python3.10 -m pip install "pybind11[global]" python3.10 -m pip install git+https://github.com/pytorch/tensordict python3.10 setup.py develop # python3.10 -m pip install git+https://github.com/pytorch/rl@$GITHUB_BRANCH diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml index 37c82fd8514..999c2f1aa39 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_build.yml @@ -4,6 +4,7 @@ name: Push Binary Nightly on: + pull_request: workflow_call: secrets: # AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID: @@ -12,9 +13,9 @@ on: # required: true PYPI_TOKEN: required: false - # run every day at 11:15am + # run every day at 15:15am schedule: - - cron: '15 11 * * *' + - cron: '15 15 * * *' # or manually trigger it workflow_dispatch: push: @@ -32,21 +33,27 @@ permissions: contents: read jobs: - build-wheel-linux: + build-wheel-unix: # Don't run on forked repos. if: github.repository_owner == 'pytorch' - runs-on: ubuntu-20.04 + continue-on-error: true + runs-on: ${{ matrix.os[1] }} strategy: matrix: - python_version: [["3.9", "cp39-cp39"], ["3.10", "cp310-cp310"], ["3.11", "cp311-cp311"], ["3.12", "cp312-cp312"]] + os: [['linux', 'ubuntu-22.04'], ['macos', 'macos-latest']] + python_version: [ + ["3.9", "cp39-cp39"], + ["3.10", "cp310-cp310"], + ["3.11", "cp311-cp311"], + ["3.12", "cp312-cp312"], + ["3.13", "cp313-cp313"], + ] cuda_support: [["", "cpu", "cpu"]] steps: - name: Checkout torchrl uses: actions/checkout@v4 - env: - AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version[0] }} - name: Install PyTorch nightly @@ -64,67 +71,35 @@ jobs: find dist -name '*whl' -exec bash -c ' mv $0 ${0/linux/manylinux1}' {} \; # pytorch/pytorch binaries are also manylinux_2_17 compliant but they # pretend that they're manylinux1 compliant so we do the same. - - name: Show auditwheel output; confirm 2-17 - run: | - python3 -mpip install auditwheel - auditwheel show dist/* - name: Upload wheel for the test-wheel job uses: actions/upload-artifact@v4 with: - name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl + name: torchrl-${{ matrix.os[0] }}-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl path: dist/*.whl - upload-wheel-linux: - # Don't run on forked repos. - if: github.repository_owner == 'pytorch' - needs: test-wheel-linux - runs-on: ubuntu-20.04 - strategy: - matrix: - python_version: [["3.9", "cp39-cp39"], ["3.10", "cp310-cp310"], ["3.11", "cp311-cp311"], ["3.12", "cp312-cp312"]] - cuda_support: [["", "cpu", "cpu"]] - steps: - - name: Checkout torchrl - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python_version[0] }} - - name: Download built wheels - uses: actions/download-artifact@v4 - with: - name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl - path: /tmp/wheels - - name: Push TorchRL Binary to PYPI - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: | - export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH" - python3 -mpip install twine - python -m twine upload \ - --username __token__ \ - --password "$PYPI_TOKEN" \ - --skip-existing \ - /tmp/wheels/torchrl_nightly-*.whl \ - --verbose - test-wheel-linux: + test-wheel-unix: # Don't run on forked repos. if: github.repository_owner == 'pytorch' - needs: build-wheel-linux - runs-on: ubuntu-20.04 + needs: build-wheel-unix + continue-on-error: true + runs-on: ${{ matrix.os[1] }} strategy: matrix: - python_version: [["3.9", "cp39-cp39"], ["3.10", "cp310-cp310"], ["3.11", "cp311-cp311"], ["3.12", "cp312-cp312"]] + os: [['linux', 'ubuntu-22.04'], ['macos', 'macos-latest']] + python_version: [ + ["3.9", "cp39-cp39"], + ["3.10", "cp310-cp310"], + ["3.11", "cp311-cp311"], + ["3.12", "cp312-cp312"], + ["3.13", "cp313-cp313"], + ] cuda_support: [["", "cpu", "cpu"]] steps: - name: Setup Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version[0] }} - architecture: x64 - env: - AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" - name: Checkout torchrl uses: actions/checkout@v4 - name: Install PyTorch Nightly @@ -137,6 +112,13 @@ jobs: python3 -mpip install --upgrade pip - name: Install tensordict run: | + if [[ "$OSTYPE" == "darwin"* ]]; then + brew install cmake + else + sudo apt-get update + sudo apt-get install -y cmake + fi + python3 -mpip install "pybind11[global]" python3 -mpip install git+https://github.com/pytorch/tensordict.git - name: Install test dependencies run: | @@ -145,7 +127,7 @@ jobs: - name: Download built wheels uses: actions/download-artifact@v4 with: - name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl + name: torchrl-${{ matrix.os[0] }}-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl path: /tmp/wheels env: AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" @@ -171,16 +153,67 @@ jobs: pytest test/smoke_test.py -v --durations 200 exit $EXIT_STATUS + upload-wheel-unix: + # Don't run on forked repos. + if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') }} + needs: test-wheel-unix + runs-on: ${{ matrix.os[1] }} + continue-on-error: true + strategy: + matrix: + os: [['linux', 'ubuntu-22.04'], ['macos', 'macos-latest']] + python_version: [ + ["3.9", "cp39-cp39"], + ["3.10", "cp310-cp310"], + ["3.11", "cp311-cp311"], + ["3.12", "cp312-cp312"], + ["3.13", "cp313-cp313"], + ] + cuda_support: [["", "cpu", "cpu"]] + steps: + - name: Checkout torchrl + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version[0] }} + - name: Download built wheels + uses: actions/download-artifact@v4 + with: + name: torchrl-${{ matrix.os[0] }}-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl + path: /tmp/wheels + - name: Push TorchRL Binary to PYPI + env: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: | + export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH" + python3 -mpip install twine + python -m twine upload \ + --username __token__ \ + --password "$PYPI_TOKEN" \ + --skip-existing \ + /tmp/wheels/torchrl_nightly-*.whl \ + --verbose + + + build-wheel-windows: # Don't run on forked repos. if: github.repository_owner == 'pytorch' runs-on: windows-latest + continue-on-error: true strategy: matrix: - python_version: [["3.9", "3.9"], ["3.10", "3.10.3"], ["3.11", "3.11"], ["3.12", "3.12"]] + python_version: [ + ["3.9", "3.9"], + ["3.10", "3.10.3"], + ["3.11", "3.11"], + ["3.12", "3.12"], + ["3.13", "3.13"], + ] steps: - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v2 with: python-version: ${{ matrix.python_version[1] }} - name: Checkout torchrl @@ -207,10 +240,17 @@ jobs: # Don't run on forked repos. if: github.repository_owner == 'pytorch' needs: build-wheel-windows + continue-on-error: true runs-on: windows-latest strategy: matrix: - python_version: [["3.9", "3.9"], ["3.10", "3.10.3"], ["3.11", "3.11"], ["3.12", "3.12"]] + python_version: [ + ["3.9", "3.9"], + ["3.10", "3.10.3"], + ["3.11", "3.11"], + ["3.12", "3.12"], + ["3.13", "3.13"], + ] steps: - name: Setup Python uses: actions/setup-python@v5 @@ -231,7 +271,16 @@ jobs: run: | python3 -mpip install numpy pytest --no-cache-dir - name: Install tensordict + shell: bash run: | + if ! choco -v &> /dev/null; then + powershell -NoProfile -InputFormat None -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" + fi + # Install cmake using Chocolatey + choco install cmake -y + # Install necessary Python packages + python -m pip install --upgrade pip + python -m pip install "pybind11[global]" python3 -mpip install git+https://github.com/pytorch/tensordict.git - name: Download built wheels uses: actions/download-artifact@v4 @@ -262,12 +311,19 @@ jobs: upload-wheel-windows: # Don't run on forked repos. - if: github.repository_owner == 'pytorch' + if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') }} needs: test-wheel-windows + continue-on-error: true runs-on: windows-latest strategy: matrix: - python_version: [["3.9", "3.9"], ["3.10", "3.10.3"], ["3.11", "3.11"], ["3.12", "3.12"]] + python_version: [ + ["3.9", "3.9"], + ["3.10", "3.10.3"], + ["3.11", "3.11"], + ["3.12", "3.12"], + ["3.13", "3.13"], + ] steps: - name: Checkout torchrl uses: actions/checkout@v4 diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 717214c30ff..3d655f9ae84 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -96,6 +96,7 @@ jobs: matrix: python_version: ["3.9"] cuda_arch_version: ["11.6"] + fail-fast: false uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu diff --git a/test/test_collector.py b/test/test_collector.py index 9b6856bdbf9..d3c157a5810 100644 --- a/test/test_collector.py +++ b/test/test_collector.py @@ -3510,6 +3510,7 @@ def all_worker_ids(self) -> list[int] | list[torch.device]: return list(range(self.num_workers)) @pytest.mark.skipif(not _has_cuda, reason="requires cuda another device than CPU.") + @pytest.mark.skipif(not _has_gym, reason="requires gym") def test_weight_update(self): device = "cuda:0" env_maker = lambda: GymEnv("Pendulum-v1", device="cpu") From 4caaeddf01ae817f83b751b29f466afa71491b88 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 11:21:29 +0100 Subject: [PATCH 2/8] Update [ghstack-poisoned] --- .github/unittest/windows_optdepts/scripts/environment.yml | 1 - .github/unittest/windows_optdepts/scripts/unittest.sh | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/unittest/windows_optdepts/scripts/environment.yml b/.github/unittest/windows_optdepts/scripts/environment.yml index 2b1abf1650a..2740c77f434 100644 --- a/.github/unittest/windows_optdepts/scripts/environment.yml +++ b/.github/unittest/windows_optdepts/scripts/environment.yml @@ -13,7 +13,6 @@ dependencies: - pytest-instafail - pytest-rerunfailures - expecttest - - pybind11[global] - pyyaml - scipy - coverage diff --git a/.github/unittest/windows_optdepts/scripts/unittest.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh index fc96b748db8..4e2d201daa3 100755 --- a/.github/unittest/windows_optdepts/scripts/unittest.sh +++ b/.github/unittest/windows_optdepts/scripts/unittest.sh @@ -95,6 +95,8 @@ fi if [[ "$RELEASE" == 0 ]]; then conda install anaconda::cmake -y + python -m pip install "pybind11[global]" + git clone https://github.com/pytorch/tensordict cd tensordict pip install -e . From 4a7b9d59fce59e3427e5432dd066a2646a9286e2 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 11:22:07 +0100 Subject: [PATCH 3/8] Update [ghstack-poisoned] --- .github/unittest/linux_libs/scripts_gym/setup_env.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh index 533d892376a..f42bcb433ab 100755 --- a/.github/unittest/linux_libs/scripts_gym/setup_env.sh +++ b/.github/unittest/linux_libs/scripts_gym/setup_env.sh @@ -12,8 +12,7 @@ this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" apt-get update && apt-get install -y git wget gcc g++ apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 -apt-get install -y libegl-dev libegl -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb +apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev git config --global --add safe.directory '*' root_dir="$(git rev-parse --show-toplevel)" From fcbeddb594cc5ff593fa858015c633c8f3f428b7 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 11:29:17 +0100 Subject: [PATCH 4/8] Update [ghstack-poisoned] --- .../windows_optdepts/scripts/unittest.sh | 6 +- .github/workflows/benchmarks.yml | 56 ++++++++++++---- .github/workflows/benchmarks_pr.yml | 66 +++++++++++++++---- 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/.github/unittest/windows_optdepts/scripts/unittest.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh index 4e2d201daa3..f115c231388 100755 --- a/.github/unittest/windows_optdepts/scripts/unittest.sh +++ b/.github/unittest/windows_optdepts/scripts/unittest.sh @@ -97,11 +97,7 @@ if [[ "$RELEASE" == 0 ]]; then python -m pip install "pybind11[global]" - git clone https://github.com/pytorch/tensordict - cd tensordict - pip install -e . - - cd .. + python -m pip install git+https://github.com/pytorch/tensordict else pip3 install tensordict fi diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index a39458573b2..e43732ec23c 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -23,11 +23,19 @@ jobs: benchmark_cpu: name: CPU Pytest benchmark - runs-on: ubuntu-22.04 + runs-on: linux.g5.4xlarge.nvidia.cpu + defaults: + run: + shell: bash -l {0} + container: + image: nvidia/cuda:12.3.0-base-ubuntu22.04 + options: --cpus all steps: - name: Who triggered this? run: | echo "Action triggered by ${{ github.event.pull_request.html_url }}" + - name: Check ldd --version + run: ldd --version - name: Checkout uses: actions/checkout@v4 with: @@ -38,29 +46,54 @@ jobs: python-version: '3.10' - name: Setup Environment run: | - python3.10 -m venv ./py310 - source ./py310/bin/activate - + export TZ=Europe/London + export DEBIAN_FRONTEND=noninteractive # tzdata bug + apt-get update -y + apt-get install software-properties-common cmake -y + add-apt-repository ppa:git-core/candidate -y apt-get update -y apt-get upgrade -y - apt-get -y install cmake + apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev + - name: Setup git + run: git config --global --add safe.directory /__w/rl/rl + - name: setup Path + run: | + echo /usr/local/bin >> $GITHUB_PATH + - name: Setup benchmarks + run: | + echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV + echo "HEAD_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c1-8)" >> $GITHUB_ENV + echo "BASELINE_JSON=$(mktemp)" >> $GITHUB_ENV + echo "CONTENDER_JSON=$(mktemp)" >> $GITHUB_ENV + echo "PR_COMMENT=$(mktemp)" >> $GITHUB_ENV + - name: Run + run: | + python3.10 -m venv --system-site-packages ./py310 + source ./py310/bin/activate + export PYTHON_INCLUDE_DIR=/usr/include/python3.10 - python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U + python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" python3 -m pip install "pybind11[global]" - python3 -m pip install git+https://github.com/pytorch/tensordict - python3 setup.py develop - python3 -m pip install pytest pytest-benchmark - python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install "dm_control" "mujoco" + python3.10 -m pip install git+https://github.com/pytorch/tensordict + python3.10 setup.py develop + + # test import + python3 -c """import torch + assert torch.cuda.device_count() + """ cd benchmarks/ export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1 export COMPOSITE_LP_AGGREGATE=0 + export CUDA_VISIBLE_DEVICES= export TD_GET_DEFAULTS_TO_NONE=1 python3 -m pytest -vvv --rank 0 --benchmark-json output.json --ignore test_collectors_benchmark.py - name: Store benchmark results uses: benchmark-action/github-action-benchmark@v1 if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }} + env: + GIT_WORK_TREE: /__w/rl/rl with: name: CPU Benchmark Results tool: 'pytest' @@ -73,7 +106,6 @@ jobs: gh-pages-branch: gh-pages auto-push: true - benchmark_gpu: name: GPU Pytest benchmark runs-on: linux.g5.4xlarge.nvidia.gpu diff --git a/.github/workflows/benchmarks_pr.yml b/.github/workflows/benchmarks_pr.yml index 4f0584844f7..ee564572257 100644 --- a/.github/workflows/benchmarks_pr.yml +++ b/.github/workflows/benchmarks_pr.yml @@ -14,11 +14,30 @@ jobs: benchmark_cpu: name: CPU Pytest benchmark - runs-on: ubuntu-22.04 + runs-on: linux.g5.4xlarge.nvidia.cpu + defaults: + run: + shell: bash -l {0} + container: + image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 + options: --cpus all steps: + - name: Set GITHUB_BRANCH environment variable + run: | + if [ "${{ github.event_name }}" == "push" ]; then + export GITHUB_BRANCH=${{ github.event.branch }} + elif [ "${{ github.event_name }}" == "pull_request" ]; then + export GITHUB_BRANCH=${{ github.event.pull_request.head.ref }} + else + echo "Unsupported event type" + exit 1 + fi + echo "GITHUB_BRANCH=$GITHUB_BRANCH" >> $GITHUB_ENV - name: Who triggered this? run: | echo "Action triggered by ${{ github.event.pull_request.html_url }}" + - name: Check ldd --version + run: ldd --version - name: Checkout uses: actions/checkout@v4 with: @@ -27,6 +46,21 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.10' + - name: Setup Environment + run: | + export TZ=Europe/London + export DEBIAN_FRONTEND=noninteractive # tzdata bug + apt-get update -y + apt-get install software-properties-common cmake -y + add-apt-repository ppa:git-core/candidate -y + apt-get update -y + apt-get upgrade -y + apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev + - name: Setup git + run: git config --global --add safe.directory /__w/rl/rl + - name: setup Path + run: | + echo /usr/local/bin >> $GITHUB_PATH - name: Setup benchmarks run: | echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV @@ -34,26 +68,28 @@ jobs: echo "BASELINE_JSON=$(mktemp)" >> $GITHUB_ENV echo "CONTENDER_JSON=$(mktemp)" >> $GITHUB_ENV echo "PR_COMMENT=$(mktemp)" >> $GITHUB_ENV - - name: Setup Environment and tests + - name: Run run: | - python3.10 -m venv ./py310 + python3.10 -m venv --system-site-packages ./py310 source ./py310/bin/activate + export PYTHON_INCLUDE_DIR=/usr/include/python3.10 - apt-get update -y - apt-get upgrade -y - apt-get -y install cmake + python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U + python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]" + python3.10 -m pip install "pybind11[global]" + python3.10 -m pip install git+https://github.com/pytorch/tensordict + python3.10 setup.py develop + # python3.10 -m pip install git+https://github.com/pytorch/rl@$GITHUB_BRANCH + + # test import + python3 -c """import torch + assert torch.cuda.device_count() + """ - python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U - python3 -m pip install "pybind11[global]" - python3 -m pip install git+https://github.com/pytorch/tensordict - python3 setup.py develop - python3 -m pip install pytest pytest-benchmark - python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install "dm_control" "mujoco" - cd benchmarks/ export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1 export COMPOSITE_LP_AGGREGATE=0 + export CUDA_VISIBLE_DEVICES= export TD_GET_DEFAULTS_TO_NONE=1 RUN_BENCHMARK="python3 -m pytest -vvv --rank 0 --ignore test_collectors_benchmark.py --benchmark-json " git checkout ${{ github.event.pull_request.base.sha }} @@ -62,6 +98,8 @@ jobs: $RUN_BENCHMARK ${{ env.CONTENDER_JSON }} - name: Publish results uses: apbard/pytest-benchmark-commenter@v3 + env: + GIT_WORK_TREE: /__w/rl/rl with: token: ${{ secrets.GITHUB_TOKEN }} benchmark-file: ${{ env.CONTENDER_JSON }} From 0d6d17df397ec92facf7b935e80828d5314de9f1 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 11:48:09 +0100 Subject: [PATCH 5/8] Update [ghstack-poisoned] --- .github/scripts/td_script.sh | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/scripts/td_script.sh b/.github/scripts/td_script.sh index be569289aed..6cf8dabf672 100644 --- a/.github/scripts/td_script.sh +++ b/.github/scripts/td_script.sh @@ -9,14 +9,9 @@ ARCH=${ARCH:-} # This sets ARCH to an empty string if it's not defined if pip list | grep -q torch; then echo "Torch is installed." - ${CONDA_RUN} pip install "pybind11[global]" + # ${CONDA_RUN} conda install 'anaconda::cmake>=3.22' -y - if conda list cmake | grep -q 'cmake'; then - echo "CMake is already installed." - else - echo "CMake is not installed. Installing now..." - ${CONDA_RUN} conda install anaconda::cmake -y --no-update-deps - fi + ${CONDA_RUN} pip install "pybind11[global]" ${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U --no-deps elif [[ -n "${SMOKE_TEST_SCRIPT:-}" ]]; then @@ -24,14 +19,10 @@ elif [[ -n "${SMOKE_TEST_SCRIPT:-}" ]]; then # TODO: revert when nightlies of tensordict are fixed # if [[ "$ARCH" == "aarch64" ]]; then - ${CONDA_RUN} pip install "pybind11[global]" - if conda list cmake | grep -q 'cmake'; then - echo "CMake is already installed." - else - echo "CMake is not installed. Installing now..." - ${CONDA_RUN} conda install anaconda::cmake -y --no-update-deps - fi +# ${CONDA_RUN} conda install 'anaconda::cmake>=3.22' -y + + ${CONDA_RUN} pip install "pybind11[global]" ${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U --no-deps else From f62c06503b177c6db4e6ae448faae4e0f5412245 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 13:10:37 +0100 Subject: [PATCH 6/8] Update [ghstack-poisoned] --- .github/unittest/linux_libs/scripts_gym/setup_env.sh | 2 +- .../unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh | 8 ++++---- test/test_storage_map.py | 4 +++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh index f42bcb433ab..6c9d73defd6 100755 --- a/.github/unittest/linux_libs/scripts_gym/setup_env.sh +++ b/.github/unittest/linux_libs/scripts_gym/setup_env.sh @@ -12,7 +12,7 @@ this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" apt-get update && apt-get install -y git wget gcc g++ apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev +apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev git config --global --add safe.directory '*' root_dir="$(git rev-parse --show-toplevel)" diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh index 20fc7edfdbb..3ed419579d4 100755 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh @@ -15,11 +15,11 @@ printf "* Installing vim - git - wget\n" apt-get install -y vim git wget printf "* Installing glfw - glew - osmesa part 1\n" -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb +apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libx11-dev libegl-dev + +#printf "* Installing glfw - glew - osmesa part 2\n" +#apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 -printf "* Installing glfw - glew - osmesa part 2\n" -apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 libegl-dev libx11-dev -# if [ "${CU_VERSION:-}" == cpu ] ; then # solves version `GLIBCXX_3.4.29' not found for tensorboard # apt-get install -y gcc-4.9 diff --git a/test/test_storage_map.py b/test/test_storage_map.py index f42baf3a495..2083aca54b4 100644 --- a/test/test_storage_map.py +++ b/test/test_storage_map.py @@ -23,7 +23,7 @@ TensorDictMap, ) from torchrl.envs import GymEnv - +import sys if os.getenv("PYTORCH_TEST_FBCODE"): from pytorch.rl.test._utils_internal import PENDULUM_VERSIONED else: @@ -32,6 +32,7 @@ _has_gym = importlib.util.find_spec("gymnasium", None) or importlib.util.find_spec( "gym", None ) +IS_WIN = sys.platform == "win32" class TestHash: @@ -702,6 +703,7 @@ def test_forest_check_obs_match(self, intersect): ).all() prev_tree = subtree + @pytest.mark.skipif(IS_WIN, reason="fails with windows machines") def test_to_string(self): forest = MCTSForest() From 34028ca273015f1b618b69421bdf9b4af625ed11 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 13:11:41 +0100 Subject: [PATCH 7/8] Update [ghstack-poisoned] --- .github/unittest/linux_libs/scripts_gym/setup_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh index 6c9d73defd6..25c493babee 100755 --- a/.github/unittest/linux_libs/scripts_gym/setup_env.sh +++ b/.github/unittest/linux_libs/scripts_gym/setup_env.sh @@ -12,7 +12,7 @@ this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" apt-get update && apt-get install -y git wget gcc g++ apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev +apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev git config --global --add safe.directory '*' root_dir="$(git rev-parse --show-toplevel)" From babebb4f580a3140b41b6ab5358001fe3cc8aa5e Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 29 Apr 2025 13:26:59 +0100 Subject: [PATCH 8/8] Update [ghstack-poisoned] --- .../unittest/linux_olddeps/scripts_gym_0_13/environment.yml | 2 +- test/test_storage_map.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml index a79a009ad0c..61726cae9b7 100644 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml @@ -2,7 +2,7 @@ channels: - pytorch - defaults dependencies: - - pip<25.0 + - pip - protobuf - pip: - hypothesis diff --git a/test/test_storage_map.py b/test/test_storage_map.py index 2083aca54b4..d8bd63db4ba 100644 --- a/test/test_storage_map.py +++ b/test/test_storage_map.py @@ -8,6 +8,7 @@ import functools import importlib.util import os +import sys import pytest @@ -23,7 +24,7 @@ TensorDictMap, ) from torchrl.envs import GymEnv -import sys + if os.getenv("PYTORCH_TEST_FBCODE"): from pytorch.rl.test._utils_internal import PENDULUM_VERSIONED else: