Merge remote-tracking branch 'origin/main' into fix-env-nontensor

# Conflicts: # test/mocking_classes.py # test/test_env.py # torchrl/data/tensor_specs.py # torchrl/envs/batched_envs.py # torchrl/envs/utils.py
pytorch · vmoens · Jun 19, 2024 · Feb 21, 2024 · Feb 21, 2024 · Jun 11, 2024
commit 78d910f3b9c85de824c0ba6d7d3c75dd09385bf7
diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -1854,3 +1854,130 @@ def _step(
 
     def _set_seed(self, seed: Optional[int]):
         return seed
+
+
+class AutoResettingCountingEnv(CountingEnv):
+    def _step(self, tensordict):
+        tensordict = super()._step(tensordict)
+        if tensordict["done"].any():
+            td_reset = super().reset()
+            tensordict.update(td_reset.exclude(*self.done_keys))
+        return tensordict
+
+    def _reset(self, tensordict=None):
+        if tensordict is not None and "_reset" in tensordict:
+            raise RuntimeError
+        return super()._reset(tensordict)
+
+
+class AutoResetHeteroCountingEnv(HeterogeneousCountingEnv):
+    def __init__(self, max_steps: int = 5, start_val: int = 0, **kwargs):
+        super().__init__(**kwargs)
+        self.n_nested_dim = 3
+        self.max_steps = max_steps
+        self.start_val = start_val
+
+        count = torch.zeros(
+            (*self.batch_size, self.n_nested_dim, 1),
+            device=self.device,
+            dtype=torch.int,
+        )
+        count[:] = self.start_val
+
+        self.register_buffer("count", count)
+        self._make_specs()
+
+    def _step(self, tensordict):
+        for i in range(self.n_nested_dim):
+            action = tensordict["lazy"][..., i]["action"]
+            action = action[..., 0].to(torch.bool)
+            self.count[..., i, 0] += action
+
+        td = self.observation_spec.zero()
+        for done_key in self.done_keys:
+            td[done_key] = self.count > self.max_steps
+
+        any_done = _terminated_or_truncated(
+            td,
+            full_done_spec=self.output_spec["full_done_spec"],
+            key=None,
+        )
+        if any_done:
+            self.count[td["lazy", "done"]] = 0
+
+        for i in range(self.n_nested_dim):
+            lazy = tensordict["lazy"][..., i]
+            for obskey in self.observation_spec.keys(True, True):
+                if isinstance(obskey, tuple) and obskey[0] == "lazy":
+                    lazy[obskey[1:]] += expand_right(
+                        self.count[..., i, 0], lazy[obskey[1:]].shape
+                    ).clone()
+        td.update(self.full_done_spec.zero())
+        td.update(self.full_reward_spec.zero())
+
+        assert td.batch_size == self.batch_size
+        return td
+
+    def _reset(self, tensordict=None):
+        if tensordict is not None and self.reset_keys[0] in tensordict.keys(True):
+            raise RuntimeError
+        self.count[:] = self.start_val
+
+        reset_td = self.observation_spec.zero()
+        reset_td.update(self.full_done_spec.zero())
+        assert reset_td.batch_size == self.batch_size
+        return reset_td
+
+
+class EnvWithDynamicSpec(EnvBase):
+    def __init__(self, max_count=5):
+        super().__init__(batch_size=())
+        self.observation_spec = CompositeSpec(
+            observation=UnboundedContinuousTensorSpec(shape=(3, -1, 2)),
+        )
+        self.action_spec = BoundedTensorSpec(low=-1, high=1, shape=(2,))
+        self.full_done_spec = CompositeSpec(
+            done=BinaryDiscreteTensorSpec(1, shape=(1,), dtype=torch.bool),
+            terminated=BinaryDiscreteTensorSpec(1, shape=(1,), dtype=torch.bool),
+            truncated=BinaryDiscreteTensorSpec(1, shape=(1,), dtype=torch.bool),
+        )
+        self.reward_spec = UnboundedContinuousTensorSpec((1,), dtype=torch.float)
+        self.count = 0
+        self.max_count = max_count
+
+    def _reset(self, tensordict=None):
+        self.count = 0
+        data = TensorDict(
+            {
+                "observation": torch.full(
+                    (3, self.count + 1, 2),
+                    self.count,
+                    dtype=self.observation_spec["observation"].dtype,
+                )
+            }
+        )
+        data.update(self.done_spec.zero())
+        return data
+
+    def _step(
+        self,
+        tensordict: TensorDictBase,
+    ) -> TensorDictBase:
+        self.count += 1
+        done = self.count >= self.max_count
+        observation = TensorDict(
+            {
+                "observation": torch.full(
+                    (3, self.count + 1, 2),
+                    self.count,
+                    dtype=self.observation_spec["observation"].dtype,
+                )
+            }
+        )
+        done = self.full_done_spec.zero() | done
+        reward = self.full_reward_spec.zero()
+        return observation.update(done).update(reward)
+
+    def _set_seed(self, seed: Optional[int]):
+        self.manual_seed = seed
+        return seed