feat(specs): make environment specs managed attributes (#220)

Co-authored-by: Sasha <reallysasha@gmail.com>
instadeepai · Mar 21, 2024 · 1f38fe6 · 1f38fe6
1 parent 0247608
commit 1f38fe6
Show file tree

Hide file tree

Showing 87 changed files with 699 additions and 345 deletions.
diff --git a/README.md b/README.md
@@ -172,7 +172,7 @@ state, timestep = jax.jit(env.reset)(key)
 env.render(state)
 
 # Interact with the (jit-able) environment
-action = env.action_spec().generate_value()          # Action selection (dummy value here)
+action = env.action_spec.generate_value()          # Action selection (dummy value here)
 state, timestep = jax.jit(env.step)(state, action)   # Take a step and observe the next state and time step
 ```
 

diff --git a/docs/guides/advanced_usage.md b/docs/guides/advanced_usage.md
@@ -16,7 +16,7 @@ env = AutoResetWrapper(env)     # Automatically reset the environment when an ep
 
 batch_size = 7
 rollout_length = 5
-num_actions = env.action_spec().num_values
+num_actions = env.action_spec.num_values
 
 random_key = jax.random.PRNGKey(0)
 key1, key2 = jax.random.split(random_key)

diff --git a/docs/guides/wrappers.md b/docs/guides/wrappers.md
@@ -13,7 +13,7 @@ env = jumanji.make("Snake-6x6-v0")
 dm_env = jumanji.wrappers.JumanjiToDMEnvWrapper(env)
 
 timestep = dm_env.reset()
-action = dm_env.action_spec().generate_value()
+action = dm_env.action_spec.generate_value()
 next_timestep = dm_env.step(action)
 ...
 ```
@@ -52,7 +52,7 @@ key = jax.random.PRNGKey(0)
 state, timestep = env.reset(key)
 print("New episode")
 for i in range(100):
-    action = env.action_spec().generate_value()  # Returns jnp.array(0) when using Snake.
+    action = env.action_spec.generate_value()  # Returns jnp.array(0) when using Snake.
     state, timestep = env.step(state, action)
     if timestep.first():
         print("New episode")

diff --git a/jumanji/env.py b/jumanji/env.py
@@ -17,13 +17,14 @@
 from __future__ import annotations
 
 import abc
+from functools import cached_property
 from typing import Any, Generic, Tuple, TypeVar
 
 import chex
 from typing_extensions import Protocol
 
 from jumanji import specs
-from jumanji.types import TimeStep
+from jumanji.types import Observation, TimeStep
 
 
 class StateProtocol(Protocol):
@@ -33,9 +34,10 @@ class StateProtocol(Protocol):
 
 
 State = TypeVar("State", bound="StateProtocol")
+ActionSpec = TypeVar("ActionSpec", bound=specs.Array)
 
 
-class Environment(abc.ABC, Generic[State]):
+class Environment(abc.ABC, Generic[State, ActionSpec, Observation]):
     """Environment written in Jax that differs from the gym API to make the step and
     reset functions jittable. The state contains all the dynamics and data needed to step
     the environment, no computation stored in attributes of self.
@@ -45,8 +47,15 @@ class Environment(abc.ABC, Generic[State]):
     def __repr__(self) -> str:
         return "Environment."
 
+    def __init__(self) -> None:
+        """Initialize environment."""
+        self.observation_spec
+        self.action_spec
+        self.reward_spec
+        self.discount_spec
+
     @abc.abstractmethod
-    def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep]:
+    def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]:
         """Resets the environment to an initial state.
 
         Args:
@@ -58,7 +67,9 @@ def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep]:
         """
 
     @abc.abstractmethod
-    def step(self, state: State, action: chex.Array) -> Tuple[State, TimeStep]:
+    def step(
+        self, state: State, action: chex.Array
+    ) -> Tuple[State, TimeStep[Observation]]:
         """Run one timestep of the environment's dynamics.
 
         Args:
@@ -71,33 +82,35 @@ def step(self, state: State, action: chex.Array) -> Tuple[State, TimeStep]:
         """
 
     @abc.abstractmethod
-    def observation_spec(self) -> specs.Spec:
+    @cached_property
+    def observation_spec(self) -> specs.Spec[Observation]:
         """Returns the observation spec.
 
         Returns:
-            observation_spec: a NestedSpec tree of spec.
+            observation_spec: a potentially nested `Spec` structure representing the observation.
         """
 
     @abc.abstractmethod
-    def action_spec(self) -> specs.Spec:
+    @cached_property
+    def action_spec(self) -> ActionSpec:
         """Returns the action spec.
 
         Returns:
-            action_spec: a NestedSpec tree of spec.
+            action_spec: a potentially nested `Spec` structure representing the action.
         """
 
+    @cached_property
     def reward_spec(self) -> specs.Array:
-        """Describes the reward returned by the environment. By default, this is assumed to be a
-        single float.
+        """Returns the reward spec. By default, this is assumed to be a single float.
 
         Returns:
             reward_spec: a `specs.Array` spec.
         """
         return specs.Array(shape=(), dtype=float, name="reward")
 
+    @cached_property
     def discount_spec(self) -> specs.BoundedArray:
-        """Describes the discount returned by the environment. By default, this is assumed to be a
-        single float between 0 and 1.
+        """Returns the discount spec. By default, this is assumed to be a single float between 0 and 1.
 
         Returns:
             discount_spec: a `specs.BoundedArray` spec.
@@ -107,7 +120,7 @@ def discount_spec(self) -> specs.BoundedArray:
         )
 
     @property
-    def unwrapped(self) -> Environment:
+    def unwrapped(self) -> Environment[State, ActionSpec, Observation]:
         return self
 
     def render(self, state: State) -> Any:

diff --git a/jumanji/environments/logic/game_2048/env.py b/jumanji/environments/logic/game_2048/env.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import cached_property
 from typing import Optional, Sequence, Tuple
 
 import chex
@@ -29,7 +30,7 @@
 from jumanji.viewer import Viewer
 
 
-class Game2048(Environment[State]):
+class Game2048(Environment[State, specs.DiscreteArray, Observation]):
     """Environment for the game 2048. The game consists of a board of size board_size x board_size
     (4x4 by default) in which the player can take actions to move the tiles on the board up, down,
     left, or right. The goal of the game is to combine tiles with the same number to create a tile
@@ -69,7 +70,7 @@ class Game2048(Environment[State]):
     key = jax.random.PRNGKey(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -85,6 +86,7 @@ def __init__(
             viewer: `Viewer` used for rendering. Defaults to `Game2048Viewer`.
         """
         self.board_size = board_size
+        super().__init__()
 
         # Create viewer used for rendering
         self._viewer = viewer or Game2048Viewer("2048", board_size)
@@ -97,6 +99,7 @@ def __repr__(self) -> str:
         """
         return f"2048 Game(board_size={self.board_size})"
 
+    @cached_property
     def observation_spec(self) -> specs.Spec[Observation]:
         """Specifications of the observation of the `Game2048` environment.
 
@@ -122,6 +125,7 @@ def observation_spec(self) -> specs.Spec[Observation]:
             ),
         )
 
+    @cached_property
     def action_spec(self) -> specs.DiscreteArray:
         """Returns the action spec.
 

diff --git a/jumanji/environments/logic/game_2048/env_test.py b/jumanji/environments/logic/game_2048/env_test.py
@@ -19,7 +19,10 @@
 
 from jumanji.environments.logic.game_2048.env import Game2048
 from jumanji.environments.logic.game_2048.types import Board, State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import TimeStep
 
@@ -154,3 +157,8 @@ def test_game_2048__get_action_mask(game_2048: Game2048, board: Board) -> None:
 def test_game_2048__does_not_smoke(game_2048: Game2048) -> None:
     """Test that we can run an episode without any errors."""
     check_env_does_not_smoke(game_2048)
+
+
+def test_game_2048__specs_does_not_smoke(game_2048: Game2048) -> None:
+    """Test that we access specs without any errors."""
+    check_env_specs_does_not_smoke(game_2048)
diff --git a/jumanji/environments/logic/graph_coloring/env.py b/jumanji/environments/logic/graph_coloring/env.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import cached_property
 from typing import Optional, Sequence, Tuple
 
 import chex
@@ -33,7 +34,7 @@
 from jumanji.viewer import Viewer
 
 
-class GraphColoring(Environment[State]):
+class GraphColoring(Environment[State, specs.DiscreteArray, Observation]):
     """Environment for the GraphColoring problem.
     The problem is a combinatorial optimization task where the goal is
       to assign a color to each vertex of a graph
@@ -76,7 +77,7 @@ class GraphColoring(Environment[State]):
     key = jax.random.PRNGKey(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -100,6 +101,7 @@ def __init__(
             num_nodes=20, edge_probability=0.8
         )
         self.num_nodes = self.generator.num_nodes
+        super().__init__()
 
         # Create viewer used for rendering
         self._env_viewer = viewer or GraphColoringViewer(name="GraphColoring")
@@ -206,6 +208,7 @@ def step(
         )
         return next_state, timestep
 
+    @cached_property
     def observation_spec(self) -> specs.Spec[Observation]:
         """Returns the observation spec.
 
@@ -253,6 +256,7 @@ def observation_spec(self) -> specs.Spec[Observation]:
             ),
         )
 
+    @cached_property
     def action_spec(self) -> specs.DiscreteArray:
         """Specification of the action for the `GraphColoring` environment.
 

diff --git a/jumanji/environments/logic/graph_coloring/env_test.py b/jumanji/environments/logic/graph_coloring/env_test.py
@@ -18,7 +18,10 @@
 
 from jumanji.environments.logic.graph_coloring import GraphColoring
 from jumanji.environments.logic.graph_coloring.types import State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import TimeStep
 
@@ -90,3 +93,8 @@ def test_graph_coloring_get_action_mask(graph_coloring: GraphColoring) -> None:
 def test_graph_coloring_does_not_smoke(graph_coloring: GraphColoring) -> None:
     """Test that we can run an episode without any errors."""
     check_env_does_not_smoke(graph_coloring)
+
+
+def test_graph_coloring_specs_does_not_smoke(graph_coloring: GraphColoring) -> None:
+    """Test that we can access specs without any errors."""
+    check_env_specs_does_not_smoke(graph_coloring)
diff --git a/jumanji/environments/logic/minesweeper/env.py b/jumanji/environments/logic/minesweeper/env.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import cached_property
 from typing import Optional, Sequence, Tuple
 
 import chex
@@ -36,7 +37,7 @@
 from jumanji.viewer import Viewer
 
 
-class Minesweeper(Environment[State]):
+class Minesweeper(Environment[State, specs.MultiDiscreteArray, Observation]):
     """A JAX implementation of the minesweeper game.
 
     - observation: `Observation`
@@ -81,7 +82,7 @@ class Minesweeper(Environment[State]):
     key = jax.random.PRNGKey(0)
     state, timestep = jax.jit(env.reset)(key)
     env.render(state)
-    action = env.action_spec().generate_value()
+    action = env.action_spec.generate_value()
     state, timestep = jax.jit(env.step)(state, action)
     env.render(state)
     ```
@@ -127,6 +128,7 @@ def __init__(
         self.num_rows = self.generator.num_rows
         self.num_cols = self.generator.num_cols
         self.num_mines = self.generator.num_mines
+        super().__init__()
         self._viewer = viewer or MinesweeperViewer(
             num_rows=self.num_rows, num_cols=self.num_cols
         )
@@ -182,6 +184,7 @@ def step(
         )
         return next_state, next_timestep
 
+    @cached_property
     def observation_spec(self) -> specs.Spec[Observation]:
         """Specifications of the observation of the `Minesweeper` environment.
 
@@ -229,6 +232,7 @@ def observation_spec(self) -> specs.Spec[Observation]:
             step_count=step_count,
         )
 
+    @cached_property
     def action_spec(self) -> specs.MultiDiscreteArray:
         """Returns the action spec.
         An action consists of the height and width of the square to be explored.

diff --git a/jumanji/environments/logic/minesweeper/env_test.py b/jumanji/environments/logic/minesweeper/env_test.py
@@ -24,7 +24,10 @@
 
 from jumanji.environments.logic.minesweeper.env import Minesweeper
 from jumanji.environments.logic.minesweeper.types import State
-from jumanji.testing.env_not_smoke import check_env_does_not_smoke
+from jumanji.testing.env_not_smoke import (
+    check_env_does_not_smoke,
+    check_env_specs_does_not_smoke,
+)
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import StepType, TimeStep
 
@@ -123,7 +126,7 @@ def test_minesweeper__step(minesweeper_env: Minesweeper) -> None:
     key = jax.random.PRNGKey(0)
     state, timestep = jax.jit(minesweeper_env.reset)(key)
     # For this board, this action will be a non-mined square
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     next_state, next_timestep = step_fn(state, action)
 
     # Check that the state has changed
@@ -154,6 +157,11 @@ def test_minesweeper__does_not_smoke(minesweeper_env: Minesweeper) -> None:
     check_env_does_not_smoke(env=minesweeper_env)
 
 
+def test_minesweeper__specs_does_not_smoke(minesweeper_env: Minesweeper) -> None:
+    """Test that we can access specs without any errors."""
+    check_env_specs_does_not_smoke(minesweeper_env)
+
+
 def test_minesweeper__render(
     monkeypatch: pytest.MonkeyPatch, minesweeper_env: Minesweeper
 ) -> None:
@@ -162,7 +170,7 @@ def test_minesweeper__render(
     state, timestep = jax.jit(minesweeper_env.reset)(jax.random.PRNGKey(0))
     minesweeper_env.render(state)
     minesweeper_env.close()
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     state, timestep = jax.jit(minesweeper_env.step)(state, action)
     minesweeper_env.render(state)
     minesweeper_env.close()
@@ -171,7 +179,7 @@ def test_minesweeper__render(
 def test_minesweeper__done_invalid_action(minesweeper_env: Minesweeper) -> None:
     """Test that the strict done signal is sent correctly"""
     # Note that this action corresponds to not stepping on a mine
-    action = minesweeper_env.action_spec().generate_value()
+    action = minesweeper_env.action_spec.generate_value()
     *_, episode_length = play_and_get_episode_stats(
         env=minesweeper_env, actions=[action for _ in range(10)], time_limit=10
     )