init

pytorch · vmoens · Nov 30, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023
commit 1f539dd4f7a10c7c8ee67d5dc1d6eb60ad6c49f0
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -794,16 +794,9 @@ def _step_and_maybe_reset(self) -> None:
             traj_ids = traj_ids.clone()
             # collectors do not support passing other tensors than `"_reset"`
             # to `reset()`.
-            traj_sop = _aggregate_resets(td_reset, reset_keys=self.env.reset_keys)
             td_reset = self.env.reset(td_reset)
 
-            if td_reset.batch_dims:
-                # better cloning here than when passing the td for stacking
-                # cloning is necessary to avoid modifying entries in-place
-                self._tensordict = torch.where(traj_sop, td_reset, self._tensordict)
-            else:
-                self._tensordict.update(td_reset)
-
+            traj_sop = _aggregate_resets(td_reset, reset_keys=self.env.reset_keys)
             traj_ids[traj_sop] = traj_ids.max() + torch.arange(
                 1, traj_sop.sum() + 1, device=traj_ids.device
             )

diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -7,7 +7,7 @@
 
 import abc
 from copy import deepcopy
-from typing import Any, Callable, Dict, Iterator, List, Optional, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple
 
 import numpy as np
 import torch
@@ -26,6 +26,7 @@
 from torchrl.data.utils import DEVICE_TYPING
 from torchrl.envs.utils import (
     _replace_last,
+    _update_during_reset,
     get_available_libraries,
     step_mdp,
     terminated_or_truncated,
@@ -1535,13 +1536,11 @@ def reset(
                             raise RuntimeError(
                                 f"Env done entry '{done_key}' was (partially) True after a call to reset(). This is not allowed."
                             )
-
-        if tensordict is not None:
-            tensordict.update(tensordict_reset)
-        else:
-            tensordict = tensordict_reset
-        tensordict.exclude(*self.reset_keys, inplace=True)
-        return tensordict
+        return (
+            _update_during_reset(tensordict_reset, tensordict, self.reset_keys)
+            if tensordict is not None
+            else tensordict_reset
+        )
 
     def numel(self) -> int:
         return prod(self.batch_size)
@@ -1836,6 +1835,30 @@ def policy(td):
         out_td.refine_names(..., "time")
         return out_td
 
+    def step_and_maybe_reset(
+        self, tensordict: TensorDictBase
+    ) -> Tuple[TensorDictBase, TensorDictBase]:
+        tensordict = self.step(tensordict)
+        tensordict_ = step_mdp(
+            tensordict,
+            keep_other=True,
+            exclude_action=False,
+            exclude_reward=True,
+            reward_keys=self.reward_keys,
+            action_keys=self.action_keys,
+            done_keys=self.done_keys,
+        )
+        # done and truncated are in done_keys
+        # We read if any key is done.
+        any_done = terminated_or_truncated(
+            tensordict,
+            full_done_spec=self.output_spec["full_done_spec"],
+            key="_reset",
+        )
+        if any_done:
+            tensordict_ = self.reset(tensordict_)
+        return tensordict, tensordict_
+
     @property
     def reset_keys(self) -> List[NestedKey]:
         """Returns a list of reset keys.

diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -899,3 +899,39 @@ def skim_through(td, reset=reset):
 
     reset = skim_through(data)
     return reset
+
+
+def _update_during_reset(
+    tensordict_reset: TensorDictBase,
+    tensordict: TensorDictBase,
+    reset_keys: List[NestedKey],
+):
+    for reset_key in reset_keys:
+        # get the node of the reset key
+        if isinstance(reset_key, tuple):
+            # the reset key *must* have gone through unravel_key
+            # we don't test it to avoid induced overhead
+            node_key = reset_key[:-1]
+            node_reset = tensordict_reset.get(node_key)
+            node = tensordict.get(node_key)
+        else:
+            node_reset = tensordict_reset
+            node = tensordict
+        # get the reset signal
+        reset = tensordict.pop(reset_key, None)
+        if reset is None or reset.all():
+            # perform simple update, at a single level.
+            # by contract, a reset signal at one level cannot
+            # be followed by other resets at nested levels, so it's safe to
+            # simply update
+            node.update(node_reset)
+        else:
+            # there can be two cases: (1) the key is present in both tds,
+            # in which case we use the reset mask to update
+            # (2) the key is not present in the input tensordict, in which
+            # case we just return the data
+
+            # empty tensordicts won't be returned
+            reset = reset.reshape(node)
+            node.where(reset, node_reset, out=node, pad=0)
+    return tensordict