Skip to content

Commit

Permalink
[Docs] Use more appropriate ActorValueOperator in PPOLoss documentati…
Browse files Browse the repository at this point in the history
…on (pytorch#2350)
  • Loading branch information
GaetanLepage authored Aug 2, 2024
1 parent 99332f5 commit 59d2ae1
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions torchrl/objectives/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class PPOLoss(LossModule):
>>> actor_head = SomeActor(in_keys=["hidden"])
>>> value_head = SomeValue(in_keys=["hidden"])
>>> # first option, with 2 calls on the common module
>>> model = ActorCriticOperator(common, actor_head, value_head)
>>> model = ActorValueOperator(common, actor_head, value_head)
>>> loss_module = PPOLoss(model.get_policy_operator(), model.get_value_operator())
>>> # second option, with a single call to the common module
>>> loss_module = PPOLoss(ProbabilisticTensorDictSequential(model, actor_head), value_head)
Expand Down Expand Up @@ -718,10 +718,10 @@ class ClipPPOLoss(PPOLoss):
>>> actor_head = SomeActor(in_keys=["hidden"])
>>> value_head = SomeValue(in_keys=["hidden"])
>>> # first option, with 2 calls on the common module
>>> model = ActorCriticOperator(common, actor_head, value_head)
>>> loss_module = PPOLoss(model.get_policy_operator(), model.get_value_operator())
>>> model = ActorValueOperator(common, actor_head, value_head)
>>> loss_module = ClipPPOLoss(model.get_policy_operator(), model.get_value_operator())
>>> # second option, with a single call to the common module
>>> loss_module = PPOLoss(ProbabilisticTensorDictSequential(model, actor_head), value_head)
>>> loss_module = ClipPPOLoss(ProbabilisticTensorDictSequential(model, actor_head), value_head)
This will work regardless of whether separate_losses is activated or not.
Expand Down Expand Up @@ -955,10 +955,10 @@ class KLPENPPOLoss(PPOLoss):
>>> actor_head = SomeActor(in_keys=["hidden"])
>>> value_head = SomeValue(in_keys=["hidden"])
>>> # first option, with 2 calls on the common module
>>> model = ActorCriticOperator(common, actor_head, value_head)
>>> loss_module = PPOLoss(model.get_policy_operator(), model.get_value_operator())
>>> model = ActorValueOperator(common, actor_head, value_head)
>>> loss_module = KLPENPPOLoss(model.get_policy_operator(), model.get_value_operator())
>>> # second option, with a single call to the common module
>>> loss_module = PPOLoss(ProbabilisticTensorDictSequential(model, actor_head), value_head)
>>> loss_module = KLPENPPOLoss(ProbabilisticTensorDictSequential(model, actor_head), value_head)
This will work regardless of whether separate_losses is activated or not.
Expand Down

0 comments on commit 59d2ae1

Please sign in to comment.