test fixes

pytorch · vmoens · Oct 11, 2024 · Sep 22, 2024 · Sep 22, 2024 · Sep 22, 2024
commit c8c9cc8c6905d89f7dfbee9cb170990203e301cd
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -3541,6 +3541,18 @@ def _create_mock_common_layer_setup(
         n_hidden=2,
         composite_action_dist=False,
     ):
+        class QValueClass(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear1 = nn.Linear(n_hidden + n_act, n_hidden)
+                self.relu = nn.ReLU()
+                self.linear2 = nn.Linear(n_hidden, 1)
+
+            def forward(self, obs, act):
+                if isinstance(act, TensorDictBase):
+                    act = act.get("action1")
+                return self.linear2(self.relu(self.linear1(torch.cat([obs, act], -1))))
+
         common = MLP(
             num_cells=ncells,
             in_features=n_obs,
@@ -3553,12 +3565,7 @@ def _create_mock_common_layer_setup(
             depth=1,
             out_features=2 * n_act,
         )
-        qvalue = MLP(
-            in_features=n_hidden + n_act,
-            num_cells=ncells,
-            depth=1,
-            out_features=1,
-        )
+        qvalue = QValueClass()
         batch = [batch]
         action = torch.randn(*batch, n_act)
         td = TensorDict(

diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -51,9 +51,7 @@ def compute_log_prob(action_dist, action_or_tensordict, tensor_key):
     if isinstance(action_or_tensordict, torch.Tensor):
         log_p = action_dist.log_prob(action_or_tensordict)
     else:
-        tensordict = action_dist.log_prob(action_or_tensordict)
-        log_p = tensordict.get(tensor_key)
-        maybe_log_prob = action_dist.log_prob(tensordict)
+        maybe_log_prob = action_dist.log_prob(action_or_tensordict)
         if not isinstance(maybe_log_prob, torch.Tensor):
             log_p = maybe_log_prob.get(tensor_key)
         else: