amend

pytorch · vmoens · Apr 30, 2024 · Apr 29, 2024 · Apr 29, 2024 · Apr 29, 2024
commit a9c6be6be95d8ccd3acadcf7f899165ad2fed2f1
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -1414,14 +1414,20 @@ def test_jumanji_consistency(self, envname, batch_size):
                 t2 = torch.tensor(onp.asarray(t2)).view_as(t1)
                 torch.testing.assert_close(t1, t2)
 
-    def test_jumanji_rendering(self, envname):
+    @pytest.mark.parametrize("batch_size", [[3], []])
+    def test_jumanji_rendering(self, envname, batch_size):
         # check that this works with a batch-size
-        env = JumanjiEnv(envname, from_pixels=True, batch_size=[3])
+        env = JumanjiEnv(envname, from_pixels=True, batch_size=batch_size)
         env.set_seed(0)
+        env.transform.transform_observation_spec(env.base_env.observation_spec)
         check_env_specs(env)
         r = env.rollout(10)
-        assert r["pixels"].unique().numel() > 1
-        assert r["pixels"].dtype == torch.uint8
+        print(r)
+        pixels = r["pixels"]
+        if not isinstance(pixels, torch.Tensor):
+            pixels = torch.as_tensor(np.asarray(pixels))
+        assert pixels.unique().numel() > 1
+        assert pixels.dtype == torch.uint8
 
 
 ENVPOOL_CLASSIC_CONTROL_ENVS = [

diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -1944,13 +1944,13 @@ def clone(self) -> NonTensorSpec:
         return self.__class__(shape=self.shape, device=self.device, dtype=self.dtype)
 
     def rand(self, shape):
-        return NonTensorData(data=None, shape=self.shape, device=self.device)
+        return NonTensorData(data=None, batch_size=self.shape, device=self.device)
 
-    def zero(self, shape):
-        return NonTensorData(data=None, shape=self.shape, device=self.device)
+    def zero(self, batch_size):
+        return NonTensorData(data=None, batch_size=self.shape, device=self.device)
 
-    def one(self, shape):
-        return NonTensorData(data=None, shape=self.shape, device=self.device)
+    def one(self, batch_size):
+        return NonTensorData(data=None, batch_size=self.shape, device=self.device)
 
     def is_in(self, val: torch.Tensor) -> bool:
         shape = torch.broadcast_shapes(self.shape, val.shape)

diff --git a/torchrl/envs/libs/jumanji.py b/torchrl/envs/libs/jumanji.py
@@ -390,7 +390,12 @@ def make_render(self):
         from torchrl.record import PixelRenderTransform
 
         return self.append_transform(
-            PixelRenderTransform(out_keys=["pixels"], pass_tensordict=True)
+            PixelRenderTransform(
+                out_keys=["pixels"],
+                pass_tensordict=True,
+                as_non_tensor=bool(self.batch_size),
+                as_numpy=bool(self.batch_size),
+            )
         )
 
     def _make_state_example(self, env):
@@ -502,7 +507,13 @@ def read_obs(self, obs):
             obs_dict = _object_to_tensordict(obs, self.device, self.batch_size)
         return super().read_obs(obs_dict)
 
-    def render(self, tensordict, matplotlib_backend: str | None = None, **kwargs):
+    def render(
+        self,
+        tensordict,
+        matplotlib_backend: str | None = None,
+        as_numpy: bool = False,
+        **kwargs,
+    ):
         """Renders the environment output given an input tensordict.
 
         This method is intended to be called by the :class:`~torchrl.record.PixelRenderTransform`
@@ -517,9 +528,17 @@ def render(self, tensordict, matplotlib_backend: str | None = None, **kwargs):
 
         This pipeline will write a `"pixels"` entry in your output tensordict.
 
+        Args:
+            tensordict (TensorDictBase): a tensordict containing a state to represent
+            matplotlib_backend (str, optional): the matplotlib backend
+            as_numpy (bool, optional): if ``False``, the np.ndarray will be converted to a torch.Tensor.
+                Defaults to ``False``.
+
         """
         import io
 
+        import jax
+        import jax.numpy as jnp
         import jumanji
         import matplotlib
         import matplotlib.pyplot as plt
@@ -529,8 +548,12 @@ def render(self, tensordict, matplotlib_backend: str | None = None, **kwargs):
             matplotlib.use(matplotlib_backend)
 
         # Get only one env
+        _state_example = self._state_example
         while tensordict.ndim:
             tensordict = tensordict[0]
+            _state_example = jax.tree_util.tree_map(
+                lambda x: jnp.take(x, 0, axis=0), _state_example
+            )
         # Patch jumanji is_notebook
         is_notebook = jumanji.environments.is_notebook
         try:
@@ -539,18 +562,19 @@ def render(self, tensordict, matplotlib_backend: str | None = None, **kwargs):
             isinteractive = plt.isinteractive()
             plt.ion()
             buf = io.BytesIO()
-            state = _tensordict_to_object(tensordict.get("state"), self._state_example)
+            state = _tensordict_to_object(tensordict.get("state"), _state_example)
             self._env.render(state, **kwargs)
             plt.savefig(buf, format="png")
             buf.seek(0)
             # Load the image into a PIL object.
             img = PIL.Image.open(buf)
-            # Convert the PIL image into a np.ndarray.
             img_array = torchvision.transforms.v2.functional.pil_to_tensor(img)
             if not isinteractive:
                 plt.ioff()
             plt.close()
-            return img_array[:3]
+            if not as_numpy:
+                return img_array[:3]
+            return img_array[:3].numpy()
         finally:
             jumanji.environments.is_notebook = is_notebook