amend

pytorch · vmoens · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024
commit c16d871194f478c5e9003b85d4db3fe22bbb2984
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -34,7 +34,8 @@
 from torchrl.collectors import RandomPolicy, SyncDataCollector
 from torchrl.collectors.utils import split_trajectories
 from torchrl.data import (
-    PrioritizedReplayBuffer,MultiStep,
+    MultiStep,
+    PrioritizedReplayBuffer,
     RemoteTensorDictReplayBuffer,
     ReplayBuffer,
     ReplayBufferEnsemble,
@@ -2560,6 +2561,13 @@ def test_rb_multidim(self, datatype, datadim, rbtype, storage_cls):
         rb = rbtype(storage=storage_cls(100, ndim=datadim), batch_size=4)
         rb.extend(data)
         assert len(rb) == 12
+        data = rb[:]
+        if datatype in ("tensordict", "tensorclass"):
+            assert data.numel() == 12
+        else:
+            assert all(
+                leaf.shape[:datadim].numel() == 12 for leaf in tree_flatten(data)[0]
+            )
         s = rb.sample()
         if datatype in ("tensordict", "tensorclass"):
             assert (s.exclude("index") == 1).all()
@@ -2605,10 +2613,15 @@ def test_rb_multidim(self, datatype, datadim, rbtype, storage_cls):
         "transform",
         [
             None,
-            [lambda: split_trajectories, functools.partial(MultiStep, gamma=0.9, n_steps=3)],
+            [
+                lambda: split_trajectories,
+                functools.partial(MultiStep, gamma=0.9, n_steps=3),
+            ],
         ],
     )
-    def test_rb_multidim_collector(self, rbtype, storage_cls, writer_cls, sampler_cls, transform):
+    def test_rb_multidim_collector(
+        self, rbtype, storage_cls, writer_cls, sampler_cls, transform
+    ):
         from _utils_internal import CARTPOLE_VERSIONED
 
         torch.manual_seed(0)
@@ -2652,6 +2665,7 @@ def test_rb_multidim_collector(self, rbtype, storage_cls, writer_cls, sampler_cl
             if transform is not None:
                 assert s.ndim == 2
 
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import collections
+import contextlib
 import json
 import textwrap
 import threading
@@ -21,7 +22,6 @@
     is_tensor_collection,
     is_tensorclass,
     LazyStackedTensorDict,
-    TensorDict,
     TensorDictBase,
     unravel_key,
 )
@@ -350,14 +350,10 @@ def __getitem__(self, index: Union[int, torch.Tensor]) -> Any:
             data = self._collate_fn(data)
 
         if self._transform is not None and len(self._transform):
-            is_td = True
-            if not is_tensor_collection(data):
-                data = TensorDict({"data": data}, [])
-                is_td = False
-            with data.unlock_():
+            with data.unlock_() if is_tensor_collection(
+                data
+            ) else contextlib.nullcontext():
                 data = self._transform(data)
-            if not is_td:
-                data = data["data"]
 
         return data
 
@@ -1067,11 +1063,13 @@ def sample(
                         val = expand_as_right(val, data)
                     data.set(key, val)
                 except RuntimeError:
-                    raise RuntimeError("Failed to set the metadata (e.g., indices or weights) in the sampled tensordict within TensorDictReplayBuffer.sample. "
-                                       "This is probably caused by a shape mismatch (one of the transforms has proably modified "
-                                       "the shape of the output tensordict). "
-                                       "You can always recover these items from the `sample` method from a regular ReplayBuffer "
-                                       "instance with the 'return_info' flag set to True." )
+                    raise RuntimeError(
+                        "Failed to set the metadata (e.g., indices or weights) in the sampled tensordict within TensorDictReplayBuffer.sample. "
+                        "This is probably caused by a shape mismatch (one of the transforms has proably modified "
+                        "the shape of the output tensordict). "
+                        "You can always recover these items from the `sample` method from a regular ReplayBuffer "
+                        "instance with the 'return_info' flag set to True."
+                    )
             if is_locked:
                 data.lock_()
         elif not is_tc and include_info in (True, None):

diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -7067,6 +7067,22 @@ def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         # if the tensordict number of dimension is greater than one we flip
         # the dimensions
-        if tensordict.ndim > 1:
+        if is_tensor_collection(tensordict) and tensordict.ndim > 1:
             tensordict = tensordict.transpose(self.dim_extend, 0)
+        else:
+            out = []
+            # we assume that if all tensors have the same leading dims
+            # they are batched
+            unique_shape = None
+            flat_tree, specs = torch.utils._pytree.tree_flatten(tensordict)
+            for tensor in flat_tree:
+                if unique_shape is None:
+                    unique_shape = tensor.shape[: self.dim_extend]
+                    if len(unique_shape) < self.dim_extend + 1:
+                        return tensordict
+                if tensor.shape[: self.dim_extend] == unique_shape:
+                    out.append(tensor)
+                else:
+                    return tensordict
+            tensordict = torch.utils._pytree.tree_unflatten(out, specs)
         return tensordict