From 76213f79e533a0b54e3d4c2b0b03beb1f781a800 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Mon, 5 Aug 2024 15:48:09 -0400 Subject: [PATCH] [BugFix, CI] Set `TD_GET_DEFAULTS_TO_NONE=1` in all CIs (#2363) --- .github/workflows/benchmarks.yml | 10 +++--- .github/workflows/benchmarks_pr.yml | 10 +++--- .github/workflows/test-linux-examples.yml | 1 + .github/workflows/test-linux-habitat.yml | 1 + .github/workflows/test-linux-libs.yml | 17 +++++++++ .github/workflows/test-linux-rlhf.yml | 1 + .github/workflows/test-linux.yml | 38 +++++++++++++++++++++ .github/workflows/test-windows-optdepts.yml | 1 + torchrl/data/tensor_specs.py | 5 ++- 9 files changed, 73 insertions(+), 11 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 8eaed2fb825..8008c8b5bbe 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -30,12 +30,13 @@ jobs: python-version: 3.8 - name: Setup Environment run: | - python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U - python -m pip install git+https://github.com/pytorch/tensordict - python setup.py develop - python -m pip install pytest pytest-benchmark + python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + python3 -m pip install git+https://github.com/pytorch/tensordict + python3 setup.py develop + python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" python3 -m pip install dm_control + export TD_GET_DEFAULTS_TO_NONE=1 - name: Run benchmarks run: | cd benchmarks/ @@ -97,6 +98,7 @@ jobs: python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" python3 -m pip install dm_control + export TD_GET_DEFAULTS_TO_NONE=1 - name: check GPU presence run: | python -c """import torch diff --git a/.github/workflows/benchmarks_pr.yml b/.github/workflows/benchmarks_pr.yml index a8a1bc4c8dc..e994e860b9c 100644 --- a/.github/workflows/benchmarks_pr.yml +++ b/.github/workflows/benchmarks_pr.yml @@ -29,12 +29,13 @@ jobs: python-version: 3.8 - name: Setup Environment run: | - python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U - python -m pip install git+https://github.com/pytorch/tensordict - python setup.py develop - python -m pip install pytest pytest-benchmark + python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U + python3 -m pip install git+https://github.com/pytorch/tensordict + python3 setup.py develop + python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" python3 -m pip install dm_control + export TD_GET_DEFAULTS_TO_NONE=1 - name: Setup benchmarks run: | echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV @@ -108,6 +109,7 @@ jobs: python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" python3 -m pip install dm_control + export TD_GET_DEFAULTS_TO_NONE=1 - name: check GPU presence run: | python -c """import torch diff --git a/.github/workflows/test-linux-examples.yml b/.github/workflows/test-linux-examples.yml index fd0adaf6ed5..39c97fae266 100644 --- a/.github/workflows/test-linux-examples.yml +++ b/.github/workflows/test-linux-examples.yml @@ -49,6 +49,7 @@ jobs: echo "PYTHON_VERSION: $PYTHON_VERSION" echo "CU_VERSION: $CU_VERSION" + export TD_GET_DEFAULTS_TO_NONE=1 ## setup_env.sh bash .github/unittest/linux_examples/scripts/run_all.sh diff --git a/.github/workflows/test-linux-habitat.yml b/.github/workflows/test-linux-habitat.yml index 3f6e89a70f9..6a1c52f90fa 100644 --- a/.github/workflows/test-linux-habitat.yml +++ b/.github/workflows/test-linux-habitat.yml @@ -46,5 +46,6 @@ jobs: export CU_VERSION="cu${CUDA_ARCH_VERSION:0:2}${CUDA_ARCH_VERSION:3:1}" # Remove the following line when the GPU tests are working inside docker, and uncomment the above lines #export CU_VERSION="cpu" + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_habitat/run_all.sh diff --git a/.github/workflows/test-linux-libs.yml b/.github/workflows/test-linux-libs.yml index 9e1875cac18..50fe0f29942 100644 --- a/.github/workflows/test-linux-libs.yml +++ b/.github/workflows/test-linux-libs.yml @@ -44,6 +44,7 @@ jobs: export TAR_OPTIONS="--no-same-owner" export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_ataridqn/setup_env.sh bash .github/unittest/linux_libs/scripts_ataridqn/install.sh @@ -81,6 +82,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi @@ -114,6 +116,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_d4rl/setup_env.sh bash .github/unittest/linux_libs/scripts_d4rl/install.sh @@ -148,6 +151,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_d4rl/setup_env.sh bash .github/unittest/linux_libs/scripts_d4rl/install.sh @@ -181,6 +185,7 @@ jobs: export TAR_OPTIONS="--no-same-owner" export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_gen-dgrl/setup_env.sh bash .github/unittest/linux_libs/scripts_gen-dgrl/install.sh @@ -216,6 +221,7 @@ jobs: export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/work/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin" export TAR_OPTIONS="--no-same-owner" export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 ./.github/unittest/linux_libs/scripts_gym/setup_env.sh ./.github/unittest/linux_libs/scripts_gym/batch_scripts.sh @@ -251,6 +257,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi @@ -285,6 +292,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi @@ -321,6 +329,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_minari/setup_env.sh bash .github/unittest/linux_libs/scripts_minari/install.sh @@ -355,6 +364,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_openx/setup_env.sh bash .github/unittest/linux_libs/scripts_openx/install.sh @@ -387,6 +397,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi @@ -423,6 +434,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_robohive/setup_env.sh bash .github/unittest/linux_libs/scripts_robohive/install_and_run_test.sh @@ -456,6 +468,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_roboset/setup_env.sh bash .github/unittest/linux_libs/scripts_roboset/install.sh @@ -491,6 +504,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_sklearn/setup_env.sh bash .github/unittest/linux_libs/scripts_sklearn/install.sh @@ -527,6 +541,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi @@ -563,6 +578,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_vd4rl/setup_env.sh bash .github/unittest/linux_libs/scripts_vd4rl/install.sh @@ -599,6 +615,7 @@ jobs: export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 export BATCHED_PIPE_TIMEOUT=60 + export TD_GET_DEFAULTS_TO_NONE=1 nvidia-smi diff --git a/.github/workflows/test-linux-rlhf.yml b/.github/workflows/test-linux-rlhf.yml index 832d432c997..accbe6e7610 100644 --- a/.github/workflows/test-linux-rlhf.yml +++ b/.github/workflows/test-linux-rlhf.yml @@ -44,6 +44,7 @@ jobs: export TAR_OPTIONS="--no-same-owner" export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_libs/scripts_rlhf/setup_env.sh bash .github/unittest/linux_libs/scripts_rlhf/install.sh diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index e8728180c67..3eafc93d0c8 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -38,6 +38,39 @@ jobs: export RELEASE=0 export TORCH_VERSION=nightly fi + export TD_GET_DEFAULTS_TO_NONE=1 + # Set env vars from matrix + export PYTHON_VERSION=${{ matrix.python_version }} + export CU_VERSION="cpu" + + echo "PYTHON_VERSION: $PYTHON_VERSION" + echo "CU_VERSION: $CU_VERSION" + + ## setup_env.sh + bash .github/unittest/linux/scripts/run_all.sh + + tests-cpu-oldget: + # Tests that TD_GET_DEFAULTS_TO_NONE=0 works fine as this will be the default for TD up to 0.7 + strategy: + matrix: + python_version: ["3.12"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.12xlarge + repository: pytorch/rl + docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04" + timeout: 90 + script: | + if [[ "${{ github.ref }}" =~ release/* ]]; then + export RELEASE=1 + export TORCH_VERSION=stable + else + export RELEASE=0 + export TORCH_VERSION=nightly + fi + export TD_GET_DEFAULTS_TO_NONE=0 + # Set env vars from matrix export PYTHON_VERSION=${{ matrix.python_version }} export CU_VERSION="cpu" @@ -75,6 +108,8 @@ jobs: export RELEASE=0 export TORCH_VERSION=nightly fi + export TD_GET_DEFAULTS_TO_NONE=1 + # Remove the following line when the GPU tests are working inside docker, and uncomment the above lines #export CU_VERSION="cpu" @@ -110,6 +145,7 @@ jobs: export TORCH_VERSION=nightly fi export TF_CPP_MIN_LOG_LEVEL=0 + export TD_GET_DEFAULTS_TO_NONE=1 bash .github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh @@ -149,6 +185,7 @@ jobs: echo "PYTHON_VERSION: $PYTHON_VERSION" echo "CU_VERSION: $CU_VERSION" + export TD_GET_DEFAULTS_TO_NONE=1 ## setup_env.sh bash .github/unittest/linux_optdeps/scripts/run_all.sh @@ -187,6 +224,7 @@ jobs: echo "PYTHON_VERSION: $PYTHON_VERSION" echo "CU_VERSION: $CU_VERSION" + export TD_GET_DEFAULTS_TO_NONE=1 ## setup_env.sh bash .github/unittest/linux/scripts/run_all.sh diff --git a/.github/workflows/test-windows-optdepts.yml b/.github/workflows/test-windows-optdepts.yml index e98b6c1810e..14a8dd7ab13 100644 --- a/.github/workflows/test-windows-optdepts.yml +++ b/.github/workflows/test-windows-optdepts.yml @@ -42,6 +42,7 @@ jobs: export RELEASE=0 export TORCH_VERSION=nightly fi + export TD_GET_DEFAULTS_TO_NONE=1 ## setup_env.sh ./.github/unittest/windows_optdepts/scripts/setup_env.sh diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py index 7f94ae80aeb..c1a6d831115 100644 --- a/torchrl/data/tensor_specs.py +++ b/torchrl/data/tensor_specs.py @@ -38,6 +38,7 @@ TensorDictBase, unravel_key, ) +from tensordict.base import NO_DEFAULT from tensordict.utils import _getitem_batch_size, NestedKey from torchrl._utils import _make_ordinal_device, get_binary_env_var @@ -79,8 +80,6 @@ " an issue at https://github.com/pytorch/rl/issues" ) -NO_DEFAULT = object() - def _default_dtype_and_device( dtype: Union[None, torch.dtype], @@ -4121,7 +4120,7 @@ def is_in(self, val: Union[dict, TensorDictBase]) -> bool: for key, item in self._specs.items(): if item is None or (isinstance(item, CompositeSpec) and item.is_empty()): continue - val_item = val[key] + val_item = val.get(key, NO_DEFAULT) if not item.is_in(val_item): return False return True