From c6ef08017bf0f4e756dabd730530a3f3e564ed5f Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Wed, 24 Jul 2024 21:07:13 +0100 Subject: [PATCH] [Doc] Fix algorithms references in tutos (#2320) --- tutorials/sphinx-tutorials/coding_ddpg.py | 10 +++++----- tutorials/sphinx-tutorials/coding_dqn.py | 8 ++++---- tutorials/sphinx-tutorials/coding_ppo.py | 2 +- tutorials/sphinx-tutorials/getting-started-1.py | 4 ++-- tutorials/sphinx-tutorials/getting-started-2.py | 6 +++--- tutorials/sphinx-tutorials/getting-started-3.py | 4 ++-- .../sphinx-tutorials/multiagent_competitive_ddpg.py | 4 ++-- tutorials/sphinx-tutorials/multiagent_ppo.py | 6 +++--- tutorials/sphinx-tutorials/torchrl_demo.py | 6 +++--- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py index 777a1dbd578..1bf7fd57e83 100644 --- a/tutorials/sphinx-tutorials/coding_ddpg.py +++ b/tutorials/sphinx-tutorials/coding_ddpg.py @@ -11,7 +11,7 @@ # Overview # -------- # -# TorchRL separates the training of RL sota-implementations in various pieces that will be +# TorchRL separates the training of RL algorithms in various pieces that will be # assembled in your training script: the environment, the data collection and # storage, the model and finally the loss function. # @@ -167,7 +167,7 @@ # the losses without it. However, we encourage its usage for the following # reason. # -# The reason TorchRL does this is that RL sota-implementations often execute the same +# The reason TorchRL does this is that RL algorithms often execute the same # model with different sets of parameters, called "trainable" and "target" # parameters. # The "trainable" parameters are those that the optimizer needs to fit. The @@ -272,7 +272,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams): ############################################################################### -# The ``make_value_estimator`` method can but does not need to be called: ifgg +# The ``make_value_estimator`` method can but does not need to be called: if # not, the :class:`~torchrl.objectives.LossModule` will query this method with # its default estimator. # @@ -406,7 +406,7 @@ class DDPGLoss(LossModule): # Environment # ----------- # -# In most sota-implementations, the first thing that needs to be taken care of is the +# In most algorithms, the first thing that needs to be taken care of is the # construction of the environment as it conditions the remainder of the # training script. # @@ -1061,7 +1061,7 @@ def ceil_div(x, y): # Target network updater # ~~~~~~~~~~~~~~~~~~~~~~ # -# Target networks are a crucial part of off-policy RL sota-implementations. +# Target networks are a crucial part of off-policy RL algorithms. # Updating the target network parameters is made easy thanks to the # :class:`~torchrl.objectives.HardUpdate` and :class:`~torchrl.objectives.SoftUpdate` # classes. They're built with the loss module as argument, and the update is diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py index 3b9d712736a..e9f2085d3df 100644 --- a/tutorials/sphinx-tutorials/coding_dqn.py +++ b/tutorials/sphinx-tutorials/coding_dqn.py @@ -42,7 +42,7 @@ # estimated return; # - how to collect data from your environment efficiently and store them # in a replay buffer; -# - how to use multi-step, a simple preprocessing step for off-policy sota-implementations; +# - how to use multi-step, a simple preprocessing step for off-policy algorithms; # - and finally how to evaluate your model. # # **Prerequisites**: We encourage you to get familiar with torchrl through the @@ -365,7 +365,7 @@ def make_model(dummy_env): # Replay buffers # ~~~~~~~~~~~~~~ # -# Replay buffers play a central role in off-policy RL sota-implementations such as DQN. +# Replay buffers play a central role in off-policy RL algorithms such as DQN. # They constitute the dataset we will be sampling from during training. # # Here, we will use a regular sampling strategy, although a prioritized RB @@ -471,13 +471,13 @@ def get_collector( # Target parameters # ~~~~~~~~~~~~~~~~~ # -# Many off-policy RL sota-implementations use the concept of "target parameters" when it +# Many off-policy RL algorithms use the concept of "target parameters" when it # comes to estimate the value of the next state or state-action pair. # The target parameters are lagged copies of the model parameters. Because # their predictions mismatch those of the current model configuration, they # help learning by putting a pessimistic bound on the value being estimated. # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous -# in similar sota-implementations. +# in similar algorithms. # diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py index 40f71798a99..51229e1880d 100644 --- a/tutorials/sphinx-tutorials/coding_ppo.py +++ b/tutorials/sphinx-tutorials/coding_ppo.py @@ -518,7 +518,7 @@ # Replay buffer # ------------- # -# Replay buffers are a common building piece of off-policy RL sota-implementations. +# Replay buffers are a common building piece of off-policy RL algorithms. # In on-policy contexts, a replay buffer is refilled every time a batch of # data is collected, and its data is repeatedly consumed for a certain number # of epochs. diff --git a/tutorials/sphinx-tutorials/getting-started-1.py b/tutorials/sphinx-tutorials/getting-started-1.py index 4cd35c9bbe7..437cae26c42 100644 --- a/tutorials/sphinx-tutorials/getting-started-1.py +++ b/tutorials/sphinx-tutorials/getting-started-1.py @@ -117,7 +117,7 @@ # Probabilistic policies # ---------------------- # -# Policy-optimization sota-implementations like +# Policy-optimization algorithms like # `PPO `_ require the policy to be # stochastic: unlike in the examples above, the module now encodes a map from # the observation space to a parameter space encoding a distribution over the @@ -161,7 +161,7 @@ # # - Since we asked for it during the construction of the actor, the # log-probability of the actions given the distribution at that time is -# also written. This is necessary for sota-implementations like PPO. +# also written. This is necessary for algorithms like PPO. # - The parameters of the distribution are returned within the output # tensordict too under the ``"loc"`` and ``"scale"`` entries. # diff --git a/tutorials/sphinx-tutorials/getting-started-2.py b/tutorials/sphinx-tutorials/getting-started-2.py index 22154cf4726..84fefc8197a 100644 --- a/tutorials/sphinx-tutorials/getting-started-2.py +++ b/tutorials/sphinx-tutorials/getting-started-2.py @@ -39,9 +39,9 @@ # ---------------------- # # In RL, innovation typically involves the exploration of novel methods -# for optimizing a policy (i.e., new sota-implementations), rather than focusing +# for optimizing a policy (i.e., new algorithms), rather than focusing # on new architectures, as seen in other domains. Within TorchRL, -# these sota-implementations are encapsulated within loss modules. A loss +# these algorithms are encapsulated within loss modules. A loss # module orchestrates the various components of your algorithm and # yields a set of loss values that can be backpropagated # through to train the corresponding components. @@ -145,7 +145,7 @@ # ----------------------------------------- # # Another important aspect to consider is the presence of target parameters -# in off-policy sota-implementations like DDPG. Target parameters typically represent +# in off-policy algorithms like DDPG. Target parameters typically represent # a delayed or smoothed version of the parameters over time, and they play # a crucial role in value estimation during policy training. Utilizing target # parameters for policy training often proves to be significantly more diff --git a/tutorials/sphinx-tutorials/getting-started-3.py b/tutorials/sphinx-tutorials/getting-started-3.py index ad6f6525a7c..3bd5c6ea5c3 100644 --- a/tutorials/sphinx-tutorials/getting-started-3.py +++ b/tutorials/sphinx-tutorials/getting-started-3.py @@ -29,7 +29,7 @@ # dataloaders are referred to as ``DataCollectors``. Most of the time, # data collection does not stop at the collection of raw data, # as the data needs to be stored temporarily in a buffer -# (or equivalent structure for on-policy sota-implementations) before being consumed +# (or equivalent structure for on-policy algorithms) before being consumed # by the :ref:`loss module `. This tutorial will explore # these two classes. # @@ -93,7 +93,7 @@ ################################# # Data collectors are very useful when it comes to coding state-of-the-art -# sota-implementations, as performance is usually measured by the capability of a +# algorithms, as performance is usually measured by the capability of a # specific technique to solve a problem in a given number of interactions with # the environment (the ``total_frames`` argument in the collector). # For this reason, most training loops in our examples look like this: diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py index 2600df2f752..77574b765e7 100644 --- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py +++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py @@ -651,7 +651,7 @@ # Replay buffer # ------------- # -# Replay buffers are a common building piece of off-policy RL sota-implementations. +# Replay buffers are a common building piece of off-policy RL algorithms. # There are many types of buffers, in this tutorial we use a basic buffer to store and sample tensordict # data randomly. # @@ -925,7 +925,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase: # # Now that you are proficient with multi-agent DDPG, you can check out all the TorchRL multi-agent implementations in the # GitHub repository. -# These are code-only scripts of many MARL sota-implementations such as the ones seen in this tutorial, +# These are code-only scripts of many MARL algorithms such as the ones seen in this tutorial, # QMIX, MADDPG, IQL, and many more! # # Also do remember to check out our tutorial: :doc:`/tutorials/multiagent_ppo`. diff --git a/tutorials/sphinx-tutorials/multiagent_ppo.py b/tutorials/sphinx-tutorials/multiagent_ppo.py index b163a5df64f..d7d906a4fb0 100644 --- a/tutorials/sphinx-tutorials/multiagent_ppo.py +++ b/tutorials/sphinx-tutorials/multiagent_ppo.py @@ -55,7 +55,7 @@ # the foundational policy-optimization algorithm. For more information, see the # `Proximal Policy Optimization Algorithms `_ paper. # -# This type of sota-implementations is usually trained *on-policy*. This means that, at every learning iteration, we have a +# This type of algorithms is usually trained *on-policy*. This means that, at every learning iteration, we have a # **sampling** and a **training** phase. In the **sampling** phase of iteration :math:`t`, rollouts are collected # form agents' interactions in the environment using the current policies :math:`\mathbf{\pi}_t`. # In the **training** phase, all the collected rollouts are immediately fed to the training process to perform @@ -551,7 +551,7 @@ # Replay buffer # ------------- # -# Replay buffers are a common building piece of off-policy RL sota-implementations. +# Replay buffers are a common building piece of off-policy RL algorithms. # In on-policy contexts, a replay buffer is refilled every time a batch of # data is collected, and its data is repeatedly consumed for a certain number # of epochs. @@ -780,7 +780,7 @@ # # Now that you are proficient with multi-agent DDPG, you can check out all the TorchRL multi-agent implementations in the # GitHub repository. -# These are code-only scripts of many popular MARL sota-implementations such as the ones seen in this tutorial, +# These are code-only scripts of many popular MARL algorithms such as the ones seen in this tutorial, # QMIX, MADDPG, IQL, and many more! # # You can also check out our other multi-agent tutorial on how to train competitive diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py index 62419cbb3ef..99ede5dd56f 100644 --- a/tutorials/sphinx-tutorials/torchrl_demo.py +++ b/tutorials/sphinx-tutorials/torchrl_demo.py @@ -162,13 +162,13 @@ # │ └── "trainers.py" # └── "version.py" # -# Unlike other domains, RL is less about media than *sota-implementations*. As such, it +# Unlike other domains, RL is less about media than *algorithms*. As such, it # is harder to make truly independent components. # # What TorchRL is not: # -# * a collection of sota-implementations: we do not intend to provide SOTA implementations of RL sota-implementations, -# but we provide these sota-implementations only as examples of how to use the library. +# * a collection of algorithms: we do not intend to provide SOTA implementations of RL algorithms, +# but we provide these algorithms only as examples of how to use the library. # # * a research framework: modularity in TorchRL comes in two flavours. First, we try # to build re-usable components, such that they can be easily swapped with each other.