pytorch · vmoens · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/examples/multiagent/iql.py b/examples/multiagent/iql.py
@@ -206,7 +206,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
             and cfg.logger.backend
         ):
             evaluation_start = time.time()
-            with torch.no_grad() and set_exploration_type(ExplorationType.MEAN):
+            with torch.no_grad(), set_exploration_type(ExplorationType.MODE):
                 env_test.frames = []
                 rollouts = env_test.rollout(
                     max_steps=cfg.env.max_steps,

diff --git a/examples/multiagent/maddpg_iddpg.py b/examples/multiagent/maddpg_iddpg.py
@@ -230,7 +230,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
             and cfg.logger.backend
         ):
             evaluation_start = time.time()
-            with torch.no_grad() and set_exploration_type(ExplorationType.MEAN):
+            with torch.no_grad(), set_exploration_type(ExplorationType.MODE):
                 env_test.frames = []
                 rollouts = env_test.rollout(
                     max_steps=cfg.env.max_steps,

diff --git a/examples/multiagent/mappo_ippo.py b/examples/multiagent/mappo_ippo.py
@@ -17,6 +17,7 @@
 from torchrl.data.replay_buffers.storages import LazyTensorStorage
 from torchrl.envs import RewardSum, TransformedEnv
 from torchrl.envs.libs.vmas import VmasEnv
+from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.modules import ProbabilisticActor, TanhNormal, ValueOperator
 from torchrl.modules.models.multiagent import MultiAgentMLP
 from torchrl.objectives import ClipPPOLoss, ValueEstimators
@@ -235,7 +236,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
             and cfg.logger.backend
         ):
             evaluation_start = time.time()
-            with torch.no_grad():
+            with torch.no_grad(), set_exploration_type(ExplorationType.MODE):
                 env_test.frames = []
                 rollouts = env_test.rollout(
                     max_steps=cfg.env.max_steps,

diff --git a/examples/multiagent/qmix_vdn.py b/examples/multiagent/qmix_vdn.py
@@ -241,7 +241,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
             and cfg.logger.backend
         ):
             evaluation_start = time.time()
-            with torch.no_grad() and set_exploration_type(ExplorationType.MEAN):
+            with torch.no_grad(), set_exploration_type(ExplorationType.MODE):
                 env_test.frames = []
                 rollouts = env_test.rollout(
                     max_steps=cfg.env.max_steps,

diff --git a/examples/multiagent/sac.py b/examples/multiagent/sac.py
@@ -300,7 +300,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
             and cfg.logger.backend
         ):
             evaluation_start = time.time()
-            with torch.no_grad() and set_exploration_type(ExplorationType.MODE):
+            with torch.no_grad(), set_exploration_type(ExplorationType.MODE):
                 env_test.frames = []
                 rollouts = env_test.rollout(
                     max_steps=cfg.env.max_steps,