lint

pytorch · vmoens · Sep 3, 2023 · Sep 2, 2023 · Sep 2, 2023 · Sep 2, 2023
commit 1332824f8c80a8bb6b2cb57c5514587e04c37d50
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -294,17 +294,11 @@ def info_reader(info, tensordict):
 
     def test_one_hot_and_categorical(self):
         # tests that one-hot and categorical work ok when an integer is expected as action
-        cliff_walking = GymEnv(
-            'CliffWalking-v0',
-            categorical_action_encoding=True
-            )
+        cliff_walking = GymEnv("CliffWalking-v0", categorical_action_encoding=True)
         cliff_walking.rollout(10)
         check_env_specs(cliff_walking)
 
-        cliff_walking = GymEnv(
-            'CliffWalking-v0',
-            categorical_action_encoding=False
-            )
+        cliff_walking = GymEnv("CliffWalking-v0", categorical_action_encoding=False)
         cliff_walking.rollout(10)
         check_env_specs(cliff_walking)
 

diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -1261,9 +1261,6 @@ def to_numpy(self, val: torch.Tensor, safe: bool = None) -> np.ndarray:
             for _v in val.view(-1):
                 vals.append(inv_reg[int(_v)])
             return np.array(vals).reshape(tuple(val.shape))
-        if val.size == 1:
-            # some envs require an integer for indexing
-            val = int(val)
         return val
 
     def index(self, index: INDEX_TYPING, tensor_to_index: torch.Tensor) -> torch.Tensor:
@@ -2099,8 +2096,8 @@ def __eq__(self, other):
     def to_numpy(self, val: torch.Tensor, safe: bool = None) -> dict:
         if safe is None:
             safe = _CHECK_SPEC_ENCODE
-        if not val.shape and not safe:
-            return val.item()
+        # if not val.shape and not safe:
+        #     return val.item()
         return super().to_numpy(val, safe)
 
     def to_one_hot(self, val: torch.Tensor, safe: bool = None) -> torch.Tensor:

diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -419,6 +419,16 @@ def _build_env(
             env = self._build_gym_env(env, pixels_only)
         return env
 
+    def read_action(self, action):
+        action = super().read_action(action)
+        if (
+            isinstance(self.action_spec, (OneHotDiscreteTensorSpec, DiscreteTensorSpec))
+            and action.size == 1
+        ):
+            # some envs require an integer for indexing
+            action = int(action)
+        return action
+
     @implement_for("gym", None, "0.19.0")
     def _build_gym_env(self, env, pixels_only):  # noqa: F811
         from .utils import GymPixelObservationWrapper as PixelObservationWrapper