Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Discrete SAC rewrite #1461

Merged
merged 18 commits into from
Aug 30, 2023
Prev Previous commit
Next Next commit
update old sac example
Signed-off-by: Matteo Bettini <matbet@meta.com>
  • Loading branch information
matteobettini committed Aug 26, 2023
commit 8365de78283c11baac4d8bc75d5bc354f392a0bc
8 changes: 5 additions & 3 deletions examples/discrete_sac/discrete_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
import torch.cuda
import tqdm
from tensordict.nn import InteractionType
from tensordict.nn import InteractionType, TensorDictModule

from torch import nn, optim
from torchrl.collectors import SyncDataCollector
Expand All @@ -27,7 +27,7 @@
from torchrl.modules import MLP, SafeModule
from torchrl.modules.distributions import OneHotCategorical

from torchrl.modules.tensordict_module.actors import ProbabilisticActor, ValueOperator
from torchrl.modules.tensordict_module.actors import ProbabilisticActor

from torchrl.objectives import DiscreteSACLoss, SoftUpdate
from torchrl.record.loggers import generate_exp_name, get_logger
Expand Down Expand Up @@ -150,8 +150,9 @@ def env_factory(num_workers):
**qvalue_net_kwargs,
)

qvalue = ValueOperator(
qvalue = TensorDictModule(
in_keys=in_keys,
out_keys=["action_value"],
module=qvalue_net,
).to(device)

Expand All @@ -171,6 +172,7 @@ def env_factory(num_workers):
# Create SAC loss
loss_module = DiscreteSACLoss(
actor_network=model[0],
action_space=test_env.action_spec,
qvalue_network=model[1],
num_actions=num_actions,
num_qvalue_nets=2,
Expand Down