From 04d8b52e1d5f62da231ce13ee1e01a9bf938d5f4 Mon Sep 17 00:00:00 2001
From: Vincent Moens <vmoens@meta.com>
Date: Mon, 22 Jul 2024 14:47:01 +0100
Subject: [PATCH] [Feature] PendulumEnv

ghstack-source-id: c7fb8a5625673250bc71e2bfe074c1b3e3258160
Pull Request resolved: https://github.com/pytorch/rl/pull/2302
---
 docs/source/reference/envs.rst         |   1 +
 test/test_env.py                       |  15 +-
 torchrl/envs/__init__.py               |   2 +-
 torchrl/envs/custom/__init__.py        |   1 +
 torchrl/envs/custom/pendulum.py        | 323 +++++++++++++++++++++++++
 torchrl/envs/utils.py                  |   8 +-
 tutorials/sphinx-tutorials/pendulum.py |   2 +
 7 files changed, 348 insertions(+), 4 deletions(-)
 create mode 100644 torchrl/envs/custom/pendulum.py

diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
index 023d93738cd..e9f6f21c644 100644
--- a/docs/source/reference/envs.rst
+++ b/docs/source/reference/envs.rst
@@ -345,6 +345,7 @@ TorchRL offers a series of custom built-in environments.
     :toctree: generated/
     :template: rl_template.rst
 
+    PendulumEnv
     TicTacToeEnv
 
 Multi-agent environments
diff --git a/test/test_env.py b/test/test_env.py
index e151ddaae0c..dcf53af3765 100644
--- a/test/test_env.py
+++ b/test/test_env.py
@@ -79,6 +79,7 @@
     EnvBase,
     EnvCreator,
     ParallelEnv,
+    PendulumEnv,
     SerialEnv,
     TicTacToeEnv,
 )
@@ -3309,7 +3310,7 @@ def test_partial_rest(self, batched):
 
 
 class TestCustomEnvs:
-    def test_tictactoe(self):
+    def test_tictactoe_env(self):
         torch.manual_seed(0)
         env = TicTacToeEnv()
         check_env_specs(env)
@@ -3319,6 +3320,18 @@ def test_tictactoe(self):
             r = env.rollout(10, tensordict=TensorDict(batch_size=[5]))
             assert r.shape[-1] < 10
 
+    def test_pendulum_env(self):
+        env = PendulumEnv(device=None)
+        assert env.device is None
+        env = PendulumEnv(device="cpu")
+        assert env.device == torch.device("cpu")
+        check_env_specs(env)
+        for _ in range(10):
+            r = env.rollout(10)
+            assert r.shape == torch.Size((10,))
+            r = env.rollout(10, tensordict=TensorDict(batch_size=[5]))
+            assert r.shape == torch.Size((5, 10))
+
 
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
index 748bef78d0b..cb1c9813ba0 100644
--- a/torchrl/envs/__init__.py
+++ b/torchrl/envs/__init__.py
@@ -5,7 +5,7 @@
 
 from .batched_envs import ParallelEnv, SerialEnv
 from .common import EnvBase, EnvMetaData, make_tensordict
-from .custom import TicTacToeEnv
+from .custom import PendulumEnv, TicTacToeEnv
 from .env_creator import EnvCreator, get_env_metadata
 from .gym_like import default_info_dict_reader, GymLikeEnv
 from .libs import (
diff --git a/torchrl/envs/custom/__init__.py b/torchrl/envs/custom/__init__.py
index c56a5ee5128..8649d3d3e97 100644
--- a/torchrl/envs/custom/__init__.py
+++ b/torchrl/envs/custom/__init__.py
@@ -3,4 +3,5 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from .pendulum import PendulumEnv
 from .tictactoeenv import TicTacToeEnv
diff --git a/torchrl/envs/custom/pendulum.py b/torchrl/envs/custom/pendulum.py
new file mode 100644
index 00000000000..8253e3df9b7
--- /dev/null
+++ b/torchrl/envs/custom/pendulum.py
@@ -0,0 +1,323 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+
+import torch
+from tensordict import TensorDict, TensorDictBase
+from torchrl.data.tensor_specs import (
+    BoundedTensorSpec,
+    CompositeSpec,
+    UnboundedContinuousTensorSpec,
+)
+from torchrl.envs.common import EnvBase
+from torchrl.envs.utils import make_composite_from_td
+
+
+class PendulumEnv(EnvBase):
+    """A stateless Pendulum environment.
+
+    See the Pendulum tutorial for more details: :ref:`tutorial <pendulum_tuto>`.
+
+    Specs:
+        CompositeSpec(
+            output_spec: CompositeSpec(
+                full_observation_spec: CompositeSpec(
+                    th: BoundedTensorSpec(
+                        shape=torch.Size([]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    thdot: BoundedTensorSpec(
+                        shape=torch.Size([]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    params: CompositeSpec(
+                        max_speed: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.int64,
+                            domain=discrete),
+                        max_torque: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        dt: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        g: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        m: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        l: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        shape=torch.Size([])),
+                    shape=torch.Size([])),
+                full_reward_spec: CompositeSpec(
+                    reward: UnboundedContinuousTensorSpec(
+                        shape=torch.Size([1]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([1]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([1]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    shape=torch.Size([])),
+                full_done_spec: CompositeSpec(
+                    done: DiscreteTensorSpec(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    terminated: DiscreteTensorSpec(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    shape=torch.Size([])),
+                shape=torch.Size([])),
+            input_spec: CompositeSpec(
+                full_state_spec: CompositeSpec(
+                    th: BoundedTensorSpec(
+                        shape=torch.Size([]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    thdot: BoundedTensorSpec(
+                        shape=torch.Size([]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    params: CompositeSpec(
+                        max_speed: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.int64,
+                            domain=discrete),
+                        max_torque: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        dt: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        g: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        m: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        l: UnboundedContinuousTensorSpec(
+                            shape=torch.Size([]),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        shape=torch.Size([])),
+                    shape=torch.Size([])),
+                full_action_spec: CompositeSpec(
+                    action: BoundedTensorSpec(
+                        shape=torch.Size([1]),
+                        space=ContinuousBox(
+                            low=Tensor(shape=torch.Size([1]), dtype=torch.float32, contiguous=True),
+                            high=Tensor(shape=torch.Size([1]), dtype=torch.float32, contiguous=True)),
+                        dtype=torch.float32,
+                        domain=continuous),
+                    shape=torch.Size([])),
+                shape=torch.Size([])),
+            shape=torch.Size([]))
+
+    """
+
+    DEFAULT_X = np.pi
+    DEFAULT_Y = 1.0
+
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "render_fps": 30,
+    }
+    batch_locked = False
+
+    def __init__(self, td_params=None, seed=None, device=None):
+        if td_params is None:
+            td_params = self.gen_params()
+
+        super().__init__(device=device)
+        self._make_spec(td_params)
+        if seed is None:
+            seed = torch.empty((), dtype=torch.int64).random_().item()
+        self.set_seed(seed)
+
+    @classmethod
+    def _step(cls, tensordict):
+        th, thdot = tensordict["th"], tensordict["thdot"]  # th := theta
+
+        g_force = tensordict["params", "g"]
+        mass = tensordict["params", "m"]
+        length = tensordict["params", "l"]
+        dt = tensordict["params", "dt"]
+        u = tensordict["action"].squeeze(-1)
+        u = u.clamp(
+            -tensordict["params", "max_torque"], tensordict["params", "max_torque"]
+        )
+        costs = cls.angle_normalize(th) ** 2 + 0.1 * thdot**2 + 0.001 * (u**2)
+
+        new_thdot = (
+            thdot
+            + (3 * g_force / (2 * length) * th.sin() + 3.0 / (mass * length**2) * u)
+            * dt
+        )
+        new_thdot = new_thdot.clamp(
+            -tensordict["params", "max_speed"], tensordict["params", "max_speed"]
+        )
+        new_th = th + new_thdot * dt
+        reward = -costs.view(*tensordict.shape, 1)
+        done = torch.zeros_like(reward, dtype=torch.bool)
+        out = TensorDict(
+            {
+                "th": new_th,
+                "thdot": new_thdot,
+                "params": tensordict["params"],
+                "reward": reward,
+                "done": done,
+            },
+            tensordict.shape,
+        )
+        return out
+
+    def _reset(self, tensordict):
+        batch_size = (
+            tensordict.batch_size if tensordict is not None else self.batch_size
+        )
+        if tensordict is None or tensordict.is_empty():
+            # if no ``tensordict`` is passed, we generate a single set of hyperparameters
+            # Otherwise, we assume that the input ``tensordict`` contains all the relevant
+            # parameters to get started.
+            tensordict = self.gen_params(batch_size=batch_size)
+
+        high_th = torch.tensor(self.DEFAULT_X, device=self.device)
+        high_thdot = torch.tensor(self.DEFAULT_Y, device=self.device)
+        low_th = -high_th
+        low_thdot = -high_thdot
+
+        # for non batch-locked environments, the input ``tensordict`` shape dictates the number
+        # of simulators run simultaneously. In other contexts, the initial
+        # random state's shape will depend upon the environment batch-size instead.
+        th = (
+            torch.rand(tensordict.shape, generator=self.rng, device=self.device)
+            * (high_th - low_th)
+            + low_th
+        )
+        thdot = (
+            torch.rand(tensordict.shape, generator=self.rng, device=self.device)
+            * (high_thdot - low_thdot)
+            + low_thdot
+        )
+        out = TensorDict(
+            {
+                "th": th,
+                "thdot": thdot,
+                "params": tensordict["params"],
+            },
+            batch_size=batch_size,
+        )
+        return out
+
+    def _make_spec(self, td_params):
+        # Under the hood, this will populate self.output_spec["observation"]
+        self.observation_spec = CompositeSpec(
+            th=BoundedTensorSpec(
+                low=-torch.pi,
+                high=torch.pi,
+                shape=(),
+                dtype=torch.float32,
+            ),
+            thdot=BoundedTensorSpec(
+                low=-td_params["params", "max_speed"],
+                high=td_params["params", "max_speed"],
+                shape=(),
+                dtype=torch.float32,
+            ),
+            # we need to add the ``params`` to the observation specs, as we want
+            # to pass it at each step during a rollout
+            params=make_composite_from_td(
+                td_params["params"], unsqueeze_null_shapes=False
+            ),
+            shape=(),
+        )
+        # since the environment is stateless, we expect the previous output as input.
+        # For this, ``EnvBase`` expects some state_spec to be available
+        self.state_spec = self.observation_spec.clone()
+        # action-spec will be automatically wrapped in input_spec when
+        # `self.action_spec = spec` will be called supported
+        self.action_spec = BoundedTensorSpec(
+            low=-td_params["params", "max_torque"],
+            high=td_params["params", "max_torque"],
+            shape=(1,),
+            dtype=torch.float32,
+        )
+        self.reward_spec = UnboundedContinuousTensorSpec(shape=(*td_params.shape, 1))
+
+    def make_composite_from_td(td):
+        # custom function to convert a ``tensordict`` in a similar spec structure
+        # of unbounded values.
+        composite = CompositeSpec(
+            {
+                key: make_composite_from_td(tensor)
+                if isinstance(tensor, TensorDictBase)
+                else UnboundedContinuousTensorSpec(
+                    dtype=tensor.dtype, device=tensor.device, shape=tensor.shape
+                )
+                for key, tensor in td.items()
+            },
+            shape=td.shape,
+        )
+        return composite
+
+    def _set_seed(self, seed: int):
+        rng = torch.manual_seed(seed)
+        self.rng = rng
+
+    @staticmethod
+    def gen_params(g=10.0, batch_size=None) -> TensorDictBase:
+        """Returns a ``tensordict`` containing the physical parameters such as gravitational force and torque or speed limits."""
+        if batch_size is None:
+            batch_size = []
+        td = TensorDict(
+            {
+                "params": TensorDict(
+                    {
+                        "max_speed": 8,
+                        "max_torque": 2.0,
+                        "dt": 0.05,
+                        "g": g,
+                        "m": 1.0,
+                        "l": 1.0,
+                    },
+                    [],
+                )
+            },
+            [],
+        )
+        if batch_size:
+            td = td.expand(batch_size).contiguous()
+        return td
+
+    @staticmethod
+    def angle_normalize(x):
+        return ((x + torch.pi) % (2 * torch.pi)) - torch.pi
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
index 61c210acffa..ee7649fabe4 100644
--- a/torchrl/envs/utils.py
+++ b/torchrl/envs/utils.py
@@ -869,11 +869,13 @@ def _sort_keys(element):
     return element
 
 
-def make_composite_from_td(data):
+def make_composite_from_td(data, unsqueeze_null_shapes: bool = True):
     """Creates a CompositeSpec instance from a tensordict, assuming all values are unbounded.
 
     Args:
         data (tensordict.TensorDict): a tensordict to be mapped onto a CompositeSpec.
+        unsqueeze_null_shapes (bool, optional): if ``True``, every empty shape will be
+            unsqueezed to (1,). Defaults to ``True``.
 
     Examples:
         >>> from tensordict import TensorDict
@@ -905,7 +907,9 @@ def make_composite_from_td(data):
             else UnboundedContinuousTensorSpec(
                 dtype=tensor.dtype,
                 device=tensor.device,
-                shape=tensor.shape if tensor.shape else [1],
+                shape=tensor.shape
+                if tensor.shape or not unsqueeze_null_shapes
+                else [1],
             )
             for key, tensor in data.items()
         },
diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py
index 4eda4ea8e91..d25bc2cdd8a 100644
--- a/tutorials/sphinx-tutorials/pendulum.py
+++ b/tutorials/sphinx-tutorials/pendulum.py
@@ -73,6 +73,8 @@
 #   simulation graph.
 # * Finally, we will train a simple policy to solve the system we implemented.
 #
+# A built-in version of this environment can be found in class:`~torchrl.envs.PendulumEnv`.
+#
 
 # sphinx_gallery_start_ignore
 import warnings