[BugFix] Fix SAC (pytorch#1189)

ai4co · May 23, 2023 · ae10bb8 · ae10bb8
1 parent 4ece06c
commit ae10bb8
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 16 deletions.
diff --git a/examples/sac/sac.py b/examples/sac/sac.py
@@ -4,8 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import dataclasses
-import uuid
-from datetime import datetime
 
 import hydra
 import torch.cuda
@@ -77,15 +75,6 @@ def main(cfg: "DictConfig"):  # noqa: F821
         else torch.device("cuda:0")
     )
 
-    exp_name = "_".join(
-        [
-            "SAC",
-            cfg.exp_name,
-            str(uuid.uuid4())[:8],
-            datetime.now().strftime("%y_%m_%d-%H_%M_%S"),
-        ]
-    )
-
     exp_name = generate_exp_name("SAC", cfg.exp_name)
     logger = get_logger(
         logger_type=cfg.logger, logger_name="sac_logging", experiment_name=exp_name

diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -82,10 +82,10 @@ class SACLoss(LossModule):
             Default is ``False``.
         delay_qvalue (bool, optional): Whether to separate the target Q value
             networks from the Q value networks used for data collection.
-            Default is ``False``.
+            Default is ``True``.
         delay_value (bool, optional): Whether to separate the target value
             networks from the value networks used for data collection.
-            Default is ``False``.
+            Default is ``True``.
     """
 
     default_value_estimator = ValueEstimators.TD0
@@ -105,8 +105,8 @@ def __init__(
         fixed_alpha: bool = False,
         target_entropy: Union[str, float] = "auto",
         delay_actor: bool = False,
-        delay_qvalue: bool = False,
-        delay_value: bool = False,
+        delay_qvalue: bool = True,
+        delay_value: bool = True,
         gamma: float = None,
     ) -> None:
         if not _has_functorch:

diff --git a/torchrl/trainers/helpers/losses.py b/torchrl/trainers/helpers/losses.py
@@ -96,7 +96,10 @@ def make_sac_loss(model, cfg) -> Tuple[SACLoss, Optional[TargetNetUpdater]]:
         **loss_kwargs,
     )
     loss_module.make_value_estimator(gamma=cfg.gamma)
-    target_net_updater = make_target_updater(cfg, loss_module)
+    if cfg.loss == "double":
+        target_net_updater = make_target_updater(cfg, loss_module)
+    else:
+        target_net_updater = None
     return loss_module, target_net_updater