amend

pytorch · vmoens · Mar 19, 2024 · Mar 18, 2024 · Mar 18, 2024 · Mar 18, 2024
commit 504a55f3812a4f7e75f154398ff32165cb0bc468
diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -1503,6 +1503,10 @@ def test_batched_actor_exceptions(self):
         with pytest.raises(RuntimeError, match="Cannot initialize the wrapper"):
             env.rollout(10, actor, tensordict=td, auto_reset=False)
 
+        actor = BatchedActionWrapper(actor_base, n_steps=time_steps - 1)
+        with pytest.raises(RuntimeError, match="The action's time dimension"):
+            env.rollout(10, actor)
+
     @pytest.mark.parametrize("time_steps", [3, 5])
     def test_batched_actor_simple(self, time_steps):
 

diff --git a/torchrl/modules/tensordict_module/actors.py b/torchrl/modules/tensordict_module/actors.py
@@ -2127,12 +2127,24 @@ class BatchedActionWrapper(TensorDictModuleBase):
     because a "done" state has been encountered. Unlike ``action_keys``,
     this key must be unique.
 
+    Args:
+        actor (TensorDictModuleBase): An actor.
+        n_steps (int): the number of actions the actor outputs at once
+            (lookahead window).
+
+    Keyword Args:
+        action_keys (list of NestedKeys, optional): the action keys from
+            the environment. Can be retrieved from ``env.action_keys``.
+            Defaults to all ``out_keys`` of the ``actor`` which end
+            with the ``"action"`` string.
+        init_key (NestedK
     """
 
     def __init__(
         self,
-        actor: TensorDictModule,
+        actor: TensorDictModuleBase,
         n_steps: int,
+        *,
         action_keys: List[NestedKey] | None = None,
         init_key: List[NestedKey] | None = None,
     ):
@@ -2210,6 +2222,11 @@ def forward(
                 action_entry = parent_td.get(action_key_orig[-1], None)
             if action_entry is None:
                 raise self._NO_INIT_ERR
+            if action_entry.shape[parent_td.ndim] != self.n_steps:
+                raise RuntimeError(
+                    f"The action's time dimension (dim={parent_td.ndim}) doesn't match the n_steps argument ({self.n_steps}). "
+                    f"The action shape was {action_entry.shape}."
+                )
             base_idx = (
                 slice(
                     None,