From 25e8bd20c24dc54fc9b3882b26e81a0e2c85544c Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Tue, 13 Aug 2024 21:16:01 +0100 Subject: [PATCH] [Deprecation] Deprecate default num_cells in MLP (#2395) --- docs/source/reference/collectors.rst | 2 +- docs/source/reference/data.rst | 2 +- docs/source/reference/envs.rst | 10 ++++---- docs/source/reference/modules.rst | 2 +- .../distributed_replay_buffer.py | 4 ++-- sota-implementations/redq/redq.py | 2 +- test/_utils_internal.py | 4 ++-- test/test_cost.py | 7 +++++- test/test_libs.py | 2 +- test/test_transforms.py | 2 +- torchrl/collectors/collectors.py | 6 ++--- torchrl/collectors/distributed/ray.py | 2 +- torchrl/data/datasets/openx.py | 4 ++-- torchrl/data/replay_buffers/replay_buffers.py | 4 ++-- torchrl/data/replay_buffers/samplers.py | 2 +- torchrl/data/replay_buffers/storages.py | 2 +- torchrl/data/replay_buffers/utils.py | 2 +- torchrl/data/tensor_specs.py | 4 ++-- torchrl/envs/gym_like.py | 4 ++-- torchrl/envs/libs/pettingzoo.py | 4 ++-- torchrl/envs/transforms/transforms.py | 24 +++++++++---------- torchrl/envs/utils.py | 6 ++--- torchrl/modules/distributions/continuous.py | 4 ++-- torchrl/modules/models/batchrenorm.py | 4 ++-- torchrl/modules/models/exploration.py | 2 +- torchrl/modules/models/models.py | 10 +++++++- torchrl/modules/tensordict_module/common.py | 4 ++-- .../modules/tensordict_module/exploration.py | 4 ++-- torchrl/modules/tensordict_module/rnn.py | 12 +++++----- torchrl/objectives/cql.py | 2 +- torchrl/objectives/deprecated.py | 2 +- torchrl/objectives/dqn.py | 2 +- torchrl/objectives/functional.py | 2 +- torchrl/objectives/iql.py | 2 +- torchrl/objectives/multiagent/qmixer.py | 2 +- torchrl/objectives/ppo.py | 8 +++---- torchrl/objectives/sac.py | 2 +- torchrl/objectives/utils.py | 2 +- torchrl/record/recorder.py | 4 ++-- torchrl/trainers/helpers/models.py | 2 +- torchrl/trainers/trainers.py | 4 ++-- tutorials/sphinx-tutorials/coding_dqn.py | 2 +- tutorials/sphinx-tutorials/coding_ppo.py | 4 ++-- .../multiagent_competitive_ddpg.py | 12 +++++----- tutorials/sphinx-tutorials/multiagent_ppo.py | 10 ++++---- tutorials/sphinx-tutorials/rb_tutorial.py | 2 +- tutorials/sphinx-tutorials/torchrl_demo.py | 2 +- 47 files changed, 110 insertions(+), 97 deletions(-) diff --git a/docs/source/reference/collectors.rst b/docs/source/reference/collectors.rst index 74bd058b8f0..6380935e92e 100644 --- a/docs/source/reference/collectors.rst +++ b/docs/source/reference/collectors.rst @@ -45,7 +45,7 @@ worker) may also impact the memory management. The key parameters to control are :obj:`devices` which controls the execution devices (ie the device of the policy) and :obj:`storing_device` which will control the device where the environment and data are stored during a rollout. A good heuristic is usually to use the same device -for storage and compute, which is the default behaviour when only the `devices` argument +for storage and compute, which is the default behavior when only the `devices` argument is being passed. Besides those compute parameters, users may choose to configure the following parameters: diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst index ed5639fcf59..6fbeada5bd0 100644 --- a/docs/source/reference/data.rst +++ b/docs/source/reference/data.rst @@ -171,7 +171,7 @@ using the following components: Storage choice is very influential on replay buffer sampling latency, especially in distributed reinforcement learning settings with larger data volumes. :class:`~torchrl.data.replay_buffers.storages.LazyMemmapStorage` is highly -advised in distributed settings with shared storage due to the lower serialisation +advised in distributed settings with shared storage due to the lower serialization cost of MemoryMappedTensors as well as the ability to specify file storage locations for improved node failure recovery. The following mean sampling latency improvements over using :class:`~torchrl.data.replay_buffers.ListStorage` diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst index 283bd2a631b..a6add08d07d 100644 --- a/docs/source/reference/envs.rst +++ b/docs/source/reference/envs.rst @@ -318,7 +318,7 @@ have on an environment returning zeros after reset: We also offer the :class:`~.SerialEnv` class that enjoys the exact same API but is executed serially. This is mostly useful for testing purposes, when one wants to assess the -behaviour of a :class:`~.ParallelEnv` without launching the subprocesses. +behavior of a :class:`~.ParallelEnv` without launching the subprocesses. In addition to :class:`~.ParallelEnv`, which offers process-based parallelism, we also provide a way to create multithreaded environments with :obj:`~.MultiThreadedEnv`. This class uses `EnvPool `_ @@ -499,7 +499,7 @@ current episode. To handle these cases, torchrl provides a :class:`~torchrl.envs.AutoResetTransform` that will copy the observations that result from the call to `step` to the next `reset` and skip the calls to `reset` during rollouts (in both :meth:`~torchrl.envs.EnvBase.rollout` and :class:`~torchrl.collectors.SyncDataCollector` iterations). -This transform class also provides a fine-grained control over the behaviour to be adopted for the invalid observations, +This transform class also provides a fine-grained control over the behavior to be adopted for the invalid observations, which can be masked with `"nan"` or any other values, or not masked at all. To tell torchrl that an environment is auto-resetting, it is sufficient to provide an ``auto_reset`` argument @@ -755,10 +755,10 @@ registered buffers: >>> TransformedEnv(base_env, third_transform.clone()) # works On a single process or if the buffers are placed in shared memory, this will -result in all the clone transforms to keep the same behaviour even if the +result in all the clone transforms to keep the same behavior even if the buffers are changed in place (which is what will happen with the :class:`CatFrames` transform, for instance). In distributed settings, this may not hold and one -should be careful about the expected behaviour of the cloned transforms in this +should be careful about the expected behavior of the cloned transforms in this context. Finally, notice that indexing multiple transforms from a :class:`Compose` transform may also result in loss of parenthood for these transforms: the reason is that @@ -1061,7 +1061,7 @@ the current gym backend or any of its modules: Another tool that comes in handy with gym and other external dependencies is the :class:`torchrl._utils.implement_for` class. Decorating a function with ``@implement_for`` will tell torchrl that, depending on the version -indicated, a specific behaviour is to be expected. This allows us to easily +indicated, a specific behavior is to be expected. This allows us to easily support multiple versions of gym without requiring any effort from the user side. For example, considering that our virtual environment has the v0.26.2 installed, the following function will return ``1`` when queried: diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst index 84603485f53..62cf1dedf35 100644 --- a/docs/source/reference/modules.rst +++ b/docs/source/reference/modules.rst @@ -62,7 +62,7 @@ Exploration wrappers To efficiently explore the environment, TorchRL proposes a series of wrappers that will override the action sampled by the policy by a noisier version. -Their behaviour is controlled by :func:`~torchrl.envs.utils.exploration_mode`: +Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_mode`: if the exploration is set to ``"random"``, the exploration is active. In all other cases, the action written in the tensordict is simply the network output. diff --git a/examples/distributed/replay_buffers/distributed_replay_buffer.py b/examples/distributed/replay_buffers/distributed_replay_buffer.py index c7504fbf8ee..f25ea0bdc8b 100644 --- a/examples/distributed/replay_buffers/distributed_replay_buffer.py +++ b/examples/distributed/replay_buffers/distributed_replay_buffer.py @@ -150,8 +150,8 @@ def _create_and_launch_data_collectors(self) -> None: class ReplayBufferNode(RemoteTensorDictReplayBuffer): """Experience replay buffer node that is capable of accepting remote connections. Being a `RemoteTensorDictReplayBuffer` - means all of it's public methods are remotely invokable using `torch.rpc`. - Using a LazyMemmapStorage is highly advised in distributed settings with shared storage due to the lower serialisation + means all of its public methods are remotely invokable using `torch.rpc`. + Using a LazyMemmapStorage is highly advised in distributed settings with shared storage due to the lower serialization cost of MemoryMappedTensors as well as the ability to specify file storage locations which can improve ability to recover from node failures. Args: diff --git a/sota-implementations/redq/redq.py b/sota-implementations/redq/redq.py index eb802f6773d..865533aee2f 100644 --- a/sota-implementations/redq/redq.py +++ b/sota-implementations/redq/redq.py @@ -159,7 +159,7 @@ def main(cfg: "DictConfig"): # noqa: F821 use_env_creator=False, )() if isinstance(create_env_fn, ParallelEnv): - raise NotImplementedError("This behaviour is deprecated") + raise NotImplementedError("This behavior is deprecated") elif isinstance(create_env_fn, EnvCreator): recorder.transform[1:].load_state_dict( get_norm_state_dict(create_env_fn()), strict=False diff --git a/test/_utils_internal.py b/test/_utils_internal.py index 61b0c003f9d..e7417a1af8d 100644 --- a/test/_utils_internal.py +++ b/test/_utils_internal.py @@ -56,7 +56,7 @@ def HALFCHEETAH_VERSIONED(): def PONG_VERSIONED(): # load gym - # Gymnasium says that the ale_py behaviour changes from 1.0 + # Gymnasium says that the ale_py behavior changes from 1.0 # but with python 3.12 it is already the case with 0.29.1 try: import ale_py # noqa @@ -70,7 +70,7 @@ def PONG_VERSIONED(): def BREAKOUT_VERSIONED(): # load gym - # Gymnasium says that the ale_py behaviour changes from 1.0 + # Gymnasium says that the ale_py behavior changes from 1.0 # but with python 3.12 it is already the case with 0.29.1 try: import ale_py # noqa diff --git a/test/test_cost.py b/test/test_cost.py index 30ccb2e153b..2af5a88f9fa 100644 --- a/test/test_cost.py +++ b/test/test_cost.py @@ -149,7 +149,12 @@ # Capture all warnings -pytestmark = pytest.mark.filterwarnings("error") +pytestmark = [ + pytest.mark.filterwarnings("error"), + pytest.mark.filterwarnings( + "ignore:The current behavior of MLP when not providing `num_cells` is that the number" + ), +] class _check_td_steady: diff --git a/test/test_libs.py b/test/test_libs.py index a76cb610d69..1931533f28a 100644 --- a/test/test_libs.py +++ b/test/test_libs.py @@ -3682,7 +3682,7 @@ class TestRoboHive: # The other option would be not to use parametrize but that also # means less informative error trace stacks. # In the CI, robohive should not coexist with other libs so that's fine. - # Robohive logging behaviour can be controlled via ROBOHIVE_VERBOSITY=ALL/INFO/(WARN)/ERROR/ONCE/ALWAYS/SILENT + # Robohive logging behavior can be controlled via ROBOHIVE_VERBOSITY=ALL/INFO/(WARN)/ERROR/ONCE/ALWAYS/SILENT @pytest.mark.parametrize("from_pixels", [False, True]) @pytest.mark.parametrize("from_depths", [False, True]) @pytest.mark.parametrize("envname", RoboHiveEnv.available_envs) diff --git a/test/test_transforms.py b/test/test_transforms.py index 948e6db7f5c..f8c4a03b9a6 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -146,7 +146,7 @@ class TransformBase: We ask for every new transform tests to be coded following this minimum requirement class. - Of course, specific behaviours can also be tested separately. + Of course, specific behaviors can also be tested separately. If your transform identifies an issue with the EnvBase or _BatchedEnv abstraction(s), this needs to be corrected independently. diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py index e7f2c94b1c2..4a10f4304f2 100644 --- a/torchrl/collectors/collectors.py +++ b/torchrl/collectors/collectors.py @@ -1412,7 +1412,7 @@ class _MultiDataCollector(DataCollectorBase): workers may charge the cpu load too much and harm performance. cat_results (str, int or None): (:class:`~torchrl.collectors.MultiSyncDataCollector` exclusively). If ``"stack"``, the data collected from the workers will be stacked along the - first dimension. This is the preferred behaviour as it is the most compatible + first dimension. This is the preferred behavior as it is the most compatible with the rest of the library. If ``0``, results will be concatenated along the first dimension of the outputs, which can be the batched dimension if the environments are @@ -2160,7 +2160,7 @@ def iterator(self) -> Iterator[TensorDictBase]: f"For MultiSyncDataCollector, `cat_results` indicates how the data should " f"be packed: the preferred option and current default is `cat_results='stack'` " f"which provides the best interoperability across torchrl components. " - f"Other accepted values are `cat_results=0` (previous behaviour) and " + f"Other accepted values are `cat_results=0` (previous behavior) and " f"`cat_results=-1` (cat along time dimension). Among these two, the latter " f"should be preferred for consistency across environment configurations. " f"Currently, the default value is `'stack'`." @@ -2948,7 +2948,7 @@ def _main_async_collector( # If policy is on cuda and env on cpu (or opposite) we put tensors that # are on cpu in shared mem. if collected_tensordict.device is not None: - # placehoder in case we need different behaviours + # placehoder in case we need different behaviors if collected_tensordict.device.type in ("cpu", "mps"): collected_tensordict.share_memory_() elif collected_tensordict.device.type == "cuda": diff --git a/torchrl/collectors/distributed/ray.py b/torchrl/collectors/distributed/ray.py index 79b3ee9063c..5552b3c60ee 100644 --- a/torchrl/collectors/distributed/ray.py +++ b/torchrl/collectors/distributed/ray.py @@ -99,7 +99,7 @@ class RayCollector(DataCollectorBase): The class dictionary input parameter "ray_init_config" can be used to provide the kwargs to call Ray initialization method ray.init(). If "ray_init_config" is not provided, the default - behaviour is to autodetect an existing Ray cluster or start a new Ray instance locally if no + behavior is to autodetect an existing Ray cluster or start a new Ray instance locally if no existing cluster is found. Refer to Ray documentation for advanced initialization kwargs. Similarly, dictionary input parameter "remote_configs" can be used to specify the kwargs for diff --git a/torchrl/data/datasets/openx.py b/torchrl/data/datasets/openx.py index 975384a3662..2dbf0720a37 100644 --- a/torchrl/data/datasets/openx.py +++ b/torchrl/data/datasets/openx.py @@ -77,7 +77,7 @@ class for more information on how to interact with non-tensor data shuffle=False will also impact the sampling. We advice users to create a copy of the dataset where the ``shuffle`` attribute of the sampler is set to ``False`` if they wish to enjoy the two different - behaviours (shuffled and not) within the same code base. + behaviors (shuffled and not) within the same code base. num_slices (int, optional): the number of slices in a batch. This corresponds to the number of trajectories present in a batch. @@ -134,7 +134,7 @@ class for more information on how to interact with non-tensor data the dataset. This isn't possible at a reasonable cost with `streaming=True`: in this case, trajectories will be sampled one at a time and delivered as such (with cropping to comply with - the batch-size etc). The behaviour of the two modalities is + the batch-size etc). The behavior of the two modalities is much more similar when `num_slices` and `slice_len` are specified, as in these cases, views of sub-episodes will be returned in both cases. diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py index afa6f861079..2e0eeb80705 100644 --- a/torchrl/data/replay_buffers/replay_buffers.py +++ b/torchrl/data/replay_buffers/replay_buffers.py @@ -1286,7 +1286,7 @@ def sample( if include_info is not None: warnings.warn( "include_info is going to be deprecated soon." - "The default behaviour has changed to `include_info=True` " + "The default behavior has changed to `include_info=True` " "to avoid bugs linked to wrongly preassigned values in the " "output tensordict." ) @@ -1548,7 +1548,7 @@ class InPlaceSampler: .. warning:: This class is deprecated and will be removed in v0.7. - To be used cautiously as this may lead to unexpected behaviour (i.e. tensordicts + To be used cautiously as this may lead to unexpected behavior (i.e. tensordicts overwritten during execution). """ diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py index 8e9cf2d695b..8338fdff74b 100644 --- a/torchrl/data/replay_buffers/samplers.py +++ b/torchrl/data/replay_buffers/samplers.py @@ -67,7 +67,7 @@ def update_priority( storage: Storage | None = None, ) -> dict | None: warnings.warn( - f"Calling update_priority() on a sampler {type(self).__name__} that is not prioritized. Make sure this is the indented behaviour." + f"Calling update_priority() on a sampler {type(self).__name__} that is not prioritized. Make sure this is the indented behavior." ) return diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py index acdf2dcf8dd..e49ab509a01 100644 --- a/torchrl/data/replay_buffers/storages.py +++ b/torchrl/data/replay_buffers/storages.py @@ -739,7 +739,7 @@ def set( # noqa: F811 "A cursor of length superior to the storage capacity was provided. " "To accommodate for this, the cursor will be truncated to its last " "element such that its length matched the length of the storage. " - "This may **not** be the optimal behaviour for your application! " + "This may **not** be the optimal behavior for your application! " "Make sure that the storage capacity is big enough to support the " "batch size provided." ) diff --git a/torchrl/data/replay_buffers/utils.py b/torchrl/data/replay_buffers/utils.py index 3a4141fd218..15a90f1a8f5 100644 --- a/torchrl/data/replay_buffers/utils.py +++ b/torchrl/data/replay_buffers/utils.py @@ -802,7 +802,7 @@ def _path2str(path, default_name=None): if result == default_name: raise RuntimeError( "A tensor had the same identifier as the default name used when the buffer contains " - f"a single tensor (name={default_name}). This behaviour is not allowed. Please rename your " + f"a single tensor (name={default_name}). This behavior is not allowed. Please rename your " f"tensor in the map/dict or set a new default name with the environment variable SINGLE_TENSOR_BUFFER_NAME." ) return result diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py index a81fa3891ad..9bbd068b434 100644 --- a/torchrl/data/tensor_specs.py +++ b/torchrl/data/tensor_specs.py @@ -524,7 +524,7 @@ class TensorSpec: """Parent class of the tensor meta-data containers. TorchRL's TensorSpec are used to present what input/output is to be expected for a specific class, - or sometimes to simulate simple behaviours by generating random data within a defined space. + or sometimes to simulate simple behaviors by generating random data within a defined space. TensorSpecs are primarily used in environments to specify their input/output structure without needing to execute the environment (or starting it). They can also be used to instantiate shared buffers to pass @@ -5316,7 +5316,7 @@ def _unsqueezed_shape(shape: torch.Size, dim: int) -> torch.Size: class _CompositeSpecItemsView: - """Wrapper class that enables richer behaviour of `items` for Composite.""" + """Wrapper class that enables richer behavior of `items` for Composite.""" def __init__( self, diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py index 82f42180913..9092d419075 100644 --- a/torchrl/envs/gym_like.py +++ b/torchrl/envs/gym_like.py @@ -149,7 +149,7 @@ def info_spec(self) -> Dict[str, TensorSpec]: class GymLikeEnv(_EnvWrapper): """A gym-like env is an environment. - Its behaviour is similar to gym environments in what common methods (specifically reset and step) are expected to do. + Its behavior is similar to gym environments in what common methods (specifically reset and step) are expected to do. A :obj:`GymLikeEnv` has a :obj:`.step()` method with the following signature: @@ -508,7 +508,7 @@ def auto_register_info_dict( the info is filled at reset time. .. note:: This method requires running a few iterations in the environment to - manually check that the behaviour matches expectations. + manually check that the behavior matches expectations. Args: ignore_private (bool, optional): If ``True``, private infos (starting with diff --git a/torchrl/envs/libs/pettingzoo.py b/torchrl/envs/libs/pettingzoo.py index e34ca4600a7..b147a005173 100644 --- a/torchrl/envs/libs/pettingzoo.py +++ b/torchrl/envs/libs/pettingzoo.py @@ -136,7 +136,7 @@ class PettingZooWrapper(_EnvWrapper): For example, you can provide ``MarlGroupMapType.ONE_GROUP_PER_AGENT``, telling that each agent should have its own tensordict (similar to the pettingzoo parallel API). - Grouping is useful for leveraging vectorisation among agents whose data goes through the same + Grouping is useful for leveraging vectorization among agents whose data goes through the same neural network. Args: @@ -897,7 +897,7 @@ class PettingZooEnv(PettingZooWrapper): For example, you can provide ``MarlGroupMapType.ONE_GROUP_PER_AGENT``, telling that each agent should have its own tensordict (similar to the pettingzoo parallel API). - Grouping is useful for leveraging vectorisation among agents whose data goes through the same + Grouping is useful for leveraging vectorization among agents whose data goes through the same neural network. Args: diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py index 8859af2f9cd..2e2883c33bf 100644 --- a/torchrl/envs/transforms/transforms.py +++ b/torchrl/envs/transforms/transforms.py @@ -599,7 +599,7 @@ def __init__( device = env.device super().__init__(device=None, allow_done_after_reset=None, **kwargs) - # Type matching must be exact here, because subtyping could introduce differences in behaviour that must + # Type matching must be exact here, because subtyping could introduce differences in behavior that must # be contained within the subclass. if type(env) is TransformedEnv and type(self) is TransformedEnv: self._set_env(env.base_env, device) @@ -1507,7 +1507,7 @@ class TargetReturn(Transform): In goal-conditioned RL, the :class:`~.TargetReturn` is defined as the expected cumulative reward obtained from the current state to the goal state - or the end of the episode. It is used as input for the policy to guide its behaviour. + or the end of the episode. It is used as input for the policy to guide its behavior. For a trained policy typically the maximum return in the environment is chosen as the target return. However, as it is used as input to the policy module, it should be scaled @@ -2505,7 +2505,7 @@ class ObservationNorm(ObservationTransform): loc (number or tensor): location of the affine transform scale (number or tensor): scale of the affine transform in_keys (sequence of NestedKey, optional): entries to be normalized. Defaults to ["observation", "pixels"]. - All entries will be normalized with the same values: if a different behaviour is desired + All entries will be normalized with the same values: if a different behavior is desired (e.g. a different normalization for pixels and states) different :obj:`ObservationNorm` objects should be used. out_keys (sequence of NestedKey, optional): output entries. Defaults to the value of `in_keys`. @@ -2569,7 +2569,7 @@ def __init__( ): if in_keys is None: raise RuntimeError( - "Not passing in_keys to ObservationNorm is a deprecated behaviour." + "Not passing in_keys to ObservationNorm is a deprecated behavior." ) if out_keys is None: @@ -3361,7 +3361,7 @@ class DTypeCastTransform(Transform): """Casts one dtype to another for selected keys. Depending on whether the ``in_keys`` or ``in_keys_inv`` are provided - during construction, the class behaviour will change: + during construction, the class behavior will change: * If the keys are provided, those entries and those entries only will be transformed from ``dtype_in`` to ``dtype_out`` entries; @@ -3417,7 +3417,7 @@ class DTypeCastTransform(Transform): >>> print(td.get("not_transformed").dtype) torch.float32 - The same behaviour is the rule when environments are constructedw without + The same behavior is the rule when environments are constructedw without specifying the transform keys: Examples: @@ -3733,7 +3733,7 @@ class DoubleToFloat(DTypeCastTransform): """Casts one dtype to another for selected keys. Depending on whether the ``in_keys`` or ``in_keys_inv`` are provided - during construction, the class behaviour will change: + during construction, the class behavior will change: * If the keys are provided, those entries and those entries only will be transformed from ``float64`` to ``float32`` entries; @@ -3787,7 +3787,7 @@ class DoubleToFloat(DTypeCastTransform): >>> print(td.get("not_transformed").dtype) torch.float32 - The same behaviour is the rule when environments are constructedw without + The same behavior is the rule when environments are constructedw without specifying the transform keys: Examples: @@ -4090,7 +4090,7 @@ class CatTensors(Transform): Args: in_keys (sequence of NestedKey): keys to be concatenated. If `None` (or not provided) the keys will be retrieved from the parent environment the first time - the transform is used. This behaviour will only work if a parent is set. + the transform is used. This behavior will only work if a parent is set. out_key (NestedKey): key of the resulting tensor. dim (int, optional): dimension along which the concatenation will occur. Default is ``-1``. @@ -4454,7 +4454,7 @@ def _reset( ) # Merge the two tensordicts tensordict = parent._reset_proc_data(tensordict.clone(False), tensordict_reset) - # check that there is a single done state -- behaviour is undefined for multiple dones + # check that there is a single done state -- behavior is undefined for multiple dones done_keys = parent.done_keys reward_key = parent.reward_key if parent.batch_size.numel() > 1: @@ -6373,7 +6373,7 @@ class RandomCropTensorDict(Transform): This transform is primarily designed to be used with replay buffers and modules. Currently, it cannot be used as an environment transform. - Do not hesitate to request for this behaviour through an issue if this is + Do not hesitate to request for this behavior through an issue if this is desired. Args: @@ -6401,7 +6401,7 @@ def __init__( if sample_dim > 0: warnings.warn( "A positive shape has been passed to the RandomCropTensorDict " - "constructor. This may have unexpected behaviours when the " + "constructor. This may have unexpected behaviors when the " "passed tensordicts have inconsistent batch dimensions. " "For context, by convention, TorchRL concatenates time steps " "along the last dimension of the tensordict." diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py index b723bd7b882..0c252c3db3f 100644 --- a/torchrl/envs/utils.py +++ b/torchrl/envs/utils.py @@ -360,7 +360,7 @@ def step_mdp( Given a tensordict retrieved after a step, returns the :obj:`"next"` indexed-tensordict. The arguments allow for a precise control over what should be kept and what - should be copied from the ``"next"`` entry. The default behaviour is: + should be copied from the ``"next"`` entry. The default behavior is: move the observation entries, reward and done states to the root, exclude the current action and keep all extra keys (non-action, non-done, non-reward). @@ -1503,7 +1503,7 @@ def _make_compatible_policy(policy, observation_spec, env=None, fast_wrap=False) If you want TorchRL to automatically wrap your policy with a TensorDictModule then the arguments to policy.forward must correspond one-to-one with entries in env.observation_spec. - For more complex behaviour and more control you can consider writing your + For more complex behavior and more control you can consider writing your own TensorDictModule. Check the collector documentation to know more about accepted policies. """ @@ -1541,7 +1541,7 @@ def _policy_is_tensordict_compatible(policy: nn.Module): # if in_keys or out_keys were defined but policy is not a TensorDictModule or # accepts multiple arguments then it's likely the user is trying to do something - # that will have undetermined behaviour, we raise an error + # that will have undetermined behavior, we raise an error raise TypeError( "Received a policy that defines in_keys or out_keys and also expects multiple " "arguments to policy.forward. If the policy is compatible with TensorDict, it " diff --git a/torchrl/modules/distributions/continuous.py b/torchrl/modules/distributions/continuous.py index fddc2f3415d..944e51f0b9e 100644 --- a/torchrl/modules/distributions/continuous.py +++ b/torchrl/modules/distributions/continuous.py @@ -39,7 +39,7 @@ class IndependentNormal(D.Independent): .. math:: loc = tanh(loc / upscale) * upscale. - This behaviour can be disabled by switching off the tanh_loc parameter (see below). + This behavior can be disabled by switching off the tanh_loc parameter (see below). Args: @@ -173,7 +173,7 @@ class TruncatedNormal(D.Independent): .. math:: loc = tanh(loc / upscale) * upscale. - This behaviour can be disabled by switching off the tanh_loc parameter (see below). + This behavior can be disabled by switching off the tanh_loc parameter (see below). Args: diff --git a/torchrl/modules/models/batchrenorm.py b/torchrl/modules/models/batchrenorm.py index 26a2f9d50d2..41de0945f70 100644 --- a/torchrl/modules/models/batchrenorm.py +++ b/torchrl/modules/models/batchrenorm.py @@ -32,9 +32,9 @@ class BatchRenorm1d(nn.Module): Defaults to ``5.0``. warmup_steps (int, optional): Number of warm-up steps for the running mean and variance. Defaults to ``10000``. - smooth (bool, optional): if ``True``, the behaviour smoothly transitions from regular + smooth (bool, optional): if ``True``, the behavior smoothly transitions from regular batch-norm (when ``iter=0``) to batch-renorm (when ``iter=warmup_steps``). - Otherwise, the behaviour will transition from batch-norm to batch-renorm when + Otherwise, the behavior will transition from batch-norm to batch-renorm when ``iter=warmup_steps``. Defaults to ``False``. """ diff --git a/torchrl/modules/models/exploration.py b/torchrl/modules/models/exploration.py index 2ec51b46559..16c6ac5ff30 100644 --- a/torchrl/modules/models/exploration.py +++ b/torchrl/modules/models/exploration.py @@ -359,7 +359,7 @@ def sigma(self): def forward(self, mu, state, _eps_gSDE): sigma = self.sigma.clamp_max(self.scale_max) - _err_explo = f"gSDE behaviour for exploration mode {exploration_type()} is not defined. Choose from 'random' or 'mode'." + _err_explo = f"gSDE behavior for exploration mode {exploration_type()} is not defined. Choose from 'random' or 'mode'." if state.shape[:-1] != mu.shape[:-1]: _err_msg = f"mu and state are expected to have matching batch size, got shapes {mu.shape} and {state.shape}" diff --git a/torchrl/modules/models/models.py b/torchrl/modules/models/models.py index 23c229c6524..3faaa396299 100644 --- a/torchrl/modules/models/models.py +++ b/torchrl/modules/models/models.py @@ -5,6 +5,7 @@ from __future__ import annotations import dataclasses +import warnings from copy import deepcopy from numbers import Number @@ -179,8 +180,15 @@ def __init__( if out_features is None: raise ValueError("out_features must be specified for MLP.") - default_num_cells = 32 if num_cells is None: + warnings.warn( + "The current behavior of MLP when not providing `num_cells` is that the number of cells is " + "set to [default_num_cells] * depth, where `depth=3` by default and `default_num_cells=0`. " + "From v0.7, this behavior will switch and `depth=0` will be used. " + "To silence tis message, indicate what number of cells you desire.", + category=DeprecationWarning, + ) + default_num_cells = 32 if depth is None: num_cells = [default_num_cells] * 3 depth = 3 diff --git a/torchrl/modules/tensordict_module/common.py b/torchrl/modules/tensordict_module/common.py index c9853c378e7..4018589bfa1 100644 --- a/torchrl/modules/tensordict_module/common.py +++ b/torchrl/modules/tensordict_module/common.py @@ -350,7 +350,7 @@ def is_tensordict_compatible(module: Union[TensorDictModule, nn.Module]): # if in_keys or out_keys were defined but module is not a TensorDictModule or # accepts multiple arguments then it's likely the user is trying to do something - # that will have undetermined behaviour, we raise an error + # that will have undetermined behavior, we raise an error raise TypeError( "Received a module that defines in_keys or out_keys and also expects multiple " "arguments to module.forward. If the module is compatible with TensorDict, it " @@ -403,7 +403,7 @@ def ensure_tensordict_compatible( "env.observation_spec. If you want TorchRL to automatically " "wrap your module with a TensorDictModule then the arguments " "to module must correspond one-to-one with entries in " - "in_keys. For more complex behaviour and more control you can " + "in_keys. For more complex behavior and more control you can " "consider writing your own TensorDictModule." ) diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py index 3b19b60048a..7337d1c94dd 100644 --- a/torchrl/modules/tensordict_module/exploration.py +++ b/torchrl/modules/tensordict_module/exploration.py @@ -707,7 +707,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase: f"The tensordict passed to {self.__class__.__name__} appears to be " f"missing the '{self.is_init_key}' entry. This entry is used to " f"reset the noise at the beginning of a trajectory, without it " - f"the behaviour of this exploration method is undefined. " + f"the behavior of this exploration method is undefined. " f"This is allowed for BC compatibility purposes but it will be deprecated soon! " f"To create a '{self.is_init_key}' entry, simply append an torchrl.envs.InitTracker " f"transform to your environment with `env = TransformedEnv(env, InitTracker())`." @@ -900,7 +900,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase: f"The tensordict passed to {self.__class__.__name__} appears to be " f"missing the '{self.is_init_key}' entry. This entry is used to " f"reset the noise at the beginning of a trajectory, without it " - f"the behaviour of this exploration method is undefined. " + f"the behavior of this exploration method is undefined. " f"This is allowed for BC compatibility purposes but it will be deprecated soon! " f"To create a '{self.is_init_key}' entry, simply append an torchrl.envs.InitTracker " f"transform to your environment with `env = TransformedEnv(env, InitTracker())`." diff --git a/torchrl/modules/tensordict_module/rnn.py b/torchrl/modules/tensordict_module/rnn.py index 6fefda2dd5d..48756683c11 100644 --- a/torchrl/modules/tensordict_module/rnn.py +++ b/torchrl/modules/tensordict_module/rnn.py @@ -529,7 +529,7 @@ def make_tensordict_primer(self): inputs and outputs (recurrent states) during rollout execution. That way, the data can be shared across processes and dealt with properly. - Not including a ``TensorDictPrimer`` in the environment may result in poorly defined behaviours, for instance + Not including a ``TensorDictPrimer`` in the environment may result in poorly defined behaviors, for instance in parallel settings where a step involves copying the new recurrent state from ``"next"`` to the root tensordict, which the meth:`~torchrl.EnvBase.step_mdp` method will not be able to do as the recurrent states are not registered within the environment specs. @@ -605,7 +605,7 @@ def temporal_mode(self): def set_recurrent_mode(self, mode: bool = True): """Returns a new copy of the module that shares the same lstm model but with a different ``recurrent_mode`` attribute (if it differs). - A copy is created such that the module can be used with divergent behaviour + A copy is created such that the module can be used with divergent behavior in various parts of the code (inference vs training): Examples: @@ -619,7 +619,7 @@ def set_recurrent_mode(self, mode: bool = True): >>> lstm = nn.LSTM(input_size=env.observation_spec["observation"].shape[-1], hidden_size=64, batch_first=True) >>> lstm_module = LSTMModule(lstm=lstm, in_keys=["observation", "hidden0", "hidden1"], out_keys=["intermediate", ("next", "hidden0"), ("next", "hidden1")]) >>> mlp = MLP(num_cells=[64], out_features=1) - >>> # building two policies with different behaviours: + >>> # building two policies with different behaviors: >>> policy_inference = Seq(lstm_module, Mod(mlp, in_keys=["intermediate"], out_keys=["action"])) >>> policy_training = Seq(lstm_module.set_recurrent_mode(True), Mod(mlp, in_keys=["intermediate"], out_keys=["action"])) >>> traj_td = env.rollout(3) # some random temporal data @@ -1275,7 +1275,7 @@ def make_tensordict_primer(self): inputs and outputs (recurrent states) during rollout execution. That way, the data can be shared across processes and dealt with properly. - Not including a ``TensorDictPrimer`` in the environment may result in poorly defined behaviours, for instance + Not including a ``TensorDictPrimer`` in the environment may result in poorly defined behaviors, for instance in parallel settings where a step involves copying the new recurrent state from ``"next"`` to the root tensordict, which the meth:`~torchrl.EnvBase.step_mdp` method will not be able to do as the recurrent states are not registered within the environment specs. @@ -1348,7 +1348,7 @@ def temporal_mode(self): def set_recurrent_mode(self, mode: bool = True): """Returns a new copy of the module that shares the same gru model but with a different ``recurrent_mode`` attribute (if it differs). - A copy is created such that the module can be used with divergent behaviour + A copy is created such that the module can be used with divergent behavior in various parts of the code (inference vs training): Examples: @@ -1361,7 +1361,7 @@ def set_recurrent_mode(self, mode: bool = True): >>> gru = nn.GRU(input_size=env.observation_spec["observation"].shape[-1], hidden_size=64, batch_first=True) >>> gru_module = GRUModule(gru=gru, in_keys=["observation", "hidden"], out_keys=["intermediate", ("next", "hidden")]) >>> mlp = MLP(num_cells=[64], out_features=1) - >>> # building two policies with different behaviours: + >>> # building two policies with different behaviors: >>> policy_inference = Seq(gru_module, Mod(mlp, in_keys=["intermediate"], out_keys=["action"])) >>> policy_training = Seq(gru_module.set_recurrent_mode(True), Mod(mlp, in_keys=["intermediate"], out_keys=["action"])) >>> traj_td = env.rollout(3) # some random temporal data diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py index 6a6cf8548e4..f7582fb5892 100644 --- a/torchrl/objectives/cql.py +++ b/torchrl/objectives/cql.py @@ -1089,7 +1089,7 @@ def __init__( if action_space is None: warnings.warn( "action_space was not specified. DiscreteCQLLoss will default to 'one-hot'. " - "This behaviour will be deprecated soon and a space will have to be passed. " + "This behavior will be deprecated soon and a space will have to be passed. " "Check the DiscreteCQLLoss documentation to see how to pass the action space." ) action_space = "one-hot" diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py index 4f805c1b411..32394942600 100644 --- a/torchrl/objectives/deprecated.py +++ b/torchrl/objectives/deprecated.py @@ -465,7 +465,7 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams class DoubleREDQLoss_deprecated(REDQLoss_deprecated): - """[Deprecated] Class for delayed target-REDQ (which should be the default behaviour).""" + """[Deprecated] Class for delayed target-REDQ (which should be the default behavior).""" delay_qvalue: bool = True diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py index 1f3ec714f53..a9d50cadd50 100644 --- a/torchrl/objectives/dqn.py +++ b/torchrl/objectives/dqn.py @@ -224,7 +224,7 @@ def __init__( if action_space is None: warnings.warn( "action_space was not specified. DQNLoss will default to 'one-hot'." - "This behaviour will be deprecated soon and a space will have to be passed." + "This behavior will be deprecated soon and a space will have to be passed." "Check the DQNLoss documentation to see how to pass the action space. " ) action_space = "one-hot" diff --git a/torchrl/objectives/functional.py b/torchrl/objectives/functional.py index 7c598676794..fd96b2e92a3 100644 --- a/torchrl/objectives/functional.py +++ b/torchrl/objectives/functional.py @@ -20,7 +20,7 @@ def cross_entropy_loss( (integer representation) or log_policy.shape (one-hot). inplace: fills log_policy in-place with 0.0 at non-selected actions before summing along the last dimensions. This is usually faster but it will change the value of log-policy in place, which may lead to unwanted - behaviours. + behaviors. """ if action.shape == log_policy.shape: diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py index 04d7e020551..a4e241347e2 100644 --- a/torchrl/objectives/iql.py +++ b/torchrl/objectives/iql.py @@ -774,7 +774,7 @@ def __init__( if action_space is None: warnings.warn( "action_space was not specified. DiscreteIQLLoss will default to 'one-hot'." - "This behaviour will be deprecated soon and a space will have to be passed." + "This behavior will be deprecated soon and a space will have to be passed." "Check the DiscreteIQLLoss documentation to see how to pass the action space. " ) action_space = "one-hot" diff --git a/torchrl/objectives/multiagent/qmixer.py b/torchrl/objectives/multiagent/qmixer.py index ce4cc8ddbb8..39777c59e26 100644 --- a/torchrl/objectives/multiagent/qmixer.py +++ b/torchrl/objectives/multiagent/qmixer.py @@ -254,7 +254,7 @@ def __init__( if action_space is None: warnings.warn( "action_space was not specified. QMixerLoss will default to 'one-hot'." - "This behaviour will be deprecated soon and a space will have to be passed." + "This behavior will be deprecated soon and a space will have to be passed." "Check the QMixerLoss documentation to see how to pass the action space. " ) action_space = "one-hot" diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py index d79f0b2ea84..b10ed5df98a 100644 --- a/torchrl/objectives/ppo.py +++ b/torchrl/objectives/ppo.py @@ -45,15 +45,15 @@ class PPOLoss(LossModule): """A parent PPO loss class. - PPO (Proximal Policy Optimisation) is a model-free, online RL algorithm + PPO (Proximal Policy Optimization) is a model-free, online RL algorithm that makes use of a recorded (batch of) trajectories to perform several optimization steps, while actively preventing the updated policy to deviate too much from its original parameter configuration. - PPO loss can be found in different flavours, depending on the way the - constrained optimisation is implemented: ClipPPOLoss and KLPENPPOLoss. - Unlike its subclasses, this class does not implement any regularisation + PPO loss can be found in different flavors, depending on the way the + constrained optimization is implemented: ClipPPOLoss and KLPENPPOLoss. + Unlike its subclasses, this class does not implement any regularization and should therefore be used cautiously. For more details regarding PPO, refer to: "Proximal Policy Optimization Algorithms", diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py index 6e57a927f37..bd21e33c30d 100644 --- a/torchrl/objectives/sac.py +++ b/torchrl/objectives/sac.py @@ -1088,7 +1088,7 @@ def __init__( if action_space is None: warnings.warn( "action_space was not specified. DiscreteSACLoss will default to 'one-hot'." - "This behaviour will be deprecated soon and a space will have to be passed. " + "This behavior will be deprecated soon and a space will have to be passed. " "Check the DiscreteSACLoss documentation to see how to pass the action space. " ) action_space = "one-hot" diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py index b1077198784..3031763c50f 100644 --- a/torchrl/objectives/utils.py +++ b/torchrl/objectives/utils.py @@ -301,7 +301,7 @@ def __init__( ): if eps is None and tau is None: raise RuntimeError( - "Neither eps nor tau was provided. This behaviour is deprecated.", + "Neither eps nor tau was provided. This behavior is deprecated.", ) eps = 0.999 if (eps is None) ^ (tau is None): diff --git a/torchrl/record/recorder.py b/torchrl/record/recorder.py index 73e3b5bdaab..e533f9e9df9 100644 --- a/torchrl/record/recorder.py +++ b/torchrl/record/recorder.py @@ -409,9 +409,9 @@ class PixelRenderTransform(Transform): >>> env.transform[-1].dump() The transform can be disabled using the :meth:`~torchrl.record.PixelRenderTransform.switch` method, which will - turn the rendering on if it's off or off if it's on (an argument can also be passed to control this behaviour). + turn the rendering on if it's off or off if it's on (an argument can also be passed to control this behavior). Since transforms are :class:`~torch.nn.Module` instances, :meth:`~torch.nn.Module.apply` can be used to control - this behaviour: + this behavior: >>> def switch(module): ... if isinstance(module, PixelRenderTransform): diff --git a/torchrl/trainers/helpers/models.py b/torchrl/trainers/helpers/models.py index 4bae738101d..a3776f78e5a 100644 --- a/torchrl/trainers/helpers/models.py +++ b/torchrl/trainers/helpers/models.py @@ -151,7 +151,7 @@ def make_dqn_actor( if isinstance(action_spec, Categorical): # if action spec is modeled as categorical variable, we still need to have features equal - # to the number of possible choices and also set categorical behavioural for actors. + # to the number of possible choices and also set categorical behavioral for actors. actor_kwargs.update({"action_space": "categorical"}) out_features = env_specs["input_spec", "full_action_spec", "action"].space.n else: diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py index 247d039eb1e..62ea4a4a109 100644 --- a/torchrl/trainers/trainers.py +++ b/torchrl/trainers/trainers.py @@ -1126,7 +1126,7 @@ class Recorder(TrainerHookBase): """Recorder hook for :class:`~torchrl.trainers.Trainer`. Args: - record_interval (int): total number of optimisation steps + record_interval (int): total number of optimization steps between two calls to the recorder for testing. record_frames (int): number of frames to be recorded during testing. @@ -1145,7 +1145,7 @@ class Recorder(TrainerHookBase): Given that this instance is supposed to both explore and render the performance of the policy, it should be possible to turn off - the explorative behaviour by calling the + the explorative behavior by calling the `set_exploration_type(ExplorationType.DETERMINISTIC)` context manager. environment (EnvBase): An environment instance to be used for testing. diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py index 2da1967e5ad..59188ad21f6 100644 --- a/tutorials/sphinx-tutorials/coding_dqn.py +++ b/tutorials/sphinx-tutorials/coding_dqn.py @@ -449,7 +449,7 @@ def get_collector( policy=actor_explore, frames_per_batch=frames_per_batch, total_frames=total_frames, - # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode + # this is the default behavior: the collector runs in ``"random"`` (or explorative) mode exploration_type=ExplorationType.RANDOM, # We set the all the devices to be identical. Below is an example of # heterogeneous devices diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py index 51229e1880d..d1b094161f1 100644 --- a/tutorials/sphinx-tutorials/coding_ppo.py +++ b/tutorials/sphinx-tutorials/coding_ppo.py @@ -195,7 +195,7 @@ # ~~~~~~~~~~~~~~ # # At each data collection (or batch collection) we will run the optimization -# over a certain number of *epochs*, each time consuming the entire data we just +# over a certain number of *epochs*, each time-consuming the entire data we just # acquired in a nested training loop. Here, the ``sub_batch_size`` is different from the # ``frames_per_batch`` here above: recall that we are working with a "batch of data" # coming from our collector, which size is defined by ``frames_per_batch``, and that @@ -203,7 +203,7 @@ # The size of these sub-batches is controlled by ``sub_batch_size``. # sub_batch_size = 64 # cardinality of the sub-samples gathered from the current data in the inner loop -num_epochs = 10 # optimisation steps per batch of data collected +num_epochs = 10 # optimization steps per batch of data collected clip_epsilon = ( 0.2 # clip value for PPO loss: see the equation in the intro for more context. ) diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py index fc1a22d50cf..08b6d83bf5c 100644 --- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py +++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py @@ -89,7 +89,7 @@ # wrapper for either PettingZoo or VMAS. # # 3. Following that, we will formulate the policy and critic networks, discussing the effects of various choices on -# parameter sharing and critic centralisation. +# parameter sharing and critic centralization. # # 4. Afterwards, we will create the sampling collector and the replay buffer. # @@ -179,7 +179,7 @@ memory_size = 1_000_000 # The replay buffer of each group can store this many frames # Training -n_optimiser_steps = 100 # Number of optimisation steps per training iteration +n_optimiser_steps = 100 # Number of optimization steps per training iteration train_batch_size = 128 # Number of frames trained in each optimiser step lr = 3e-4 # Learning rate max_grad_norm = 1.0 # Maximum norm for the gradients @@ -193,7 +193,7 @@ # ----------- # # Multi-agent environments simulate multiple agents interacting with the world. -# TorchRL API allows integrating various types of multi-agent environment flavours. +# TorchRL API allows integrating various types of multi-agent environment flavors. # In this tutorial we will focus on environments where multiple agent groups interact in parallel. # That is: at every step all agents will get an observation and take an action synchronously. # @@ -310,7 +310,7 @@ # Looking at the ``done_spec``, we can see that there are some keys that are outside of agent groups # (``"done", "terminated", "truncated"``), which do not have a leading multi-agent dimension. # These keys are shared by all agents and represent the environment global done state used for resetting. -# By default, like in this case, parallel PettingZoo environments are done when any agent is done, but this behaviour +# By default, like in this case, parallel PettingZoo environments are done when any agent is done, but this behavior # can be overridden by setting ``done_on_any`` at PettingZoo environment construction. # # To quickly access the keys for each of these values in tensordicts, we can simply ask the environment for the @@ -415,7 +415,7 @@ # Another important decision we need to make is whether we want the agents within a team to **share the policy parameters**. # On the one hand, sharing parameters means that they will all share the same policy, which will allow them to benefit from # each other's experiences. This will also result in faster training. -# On the other hand, it will make them behaviourally *homogenous*, as they will in fact share the same model. +# On the other hand, it will make them behaviorally *homogenous*, as they will in fact share the same model. # For this example, we will enable sharing as we do not mind the homogeneity and can benefit from the computational # speed, but it is important to always think about this decision in your own problems! # @@ -424,7 +424,7 @@ # **First**: define a neural network ``n_obs_per_agent`` -> ``n_actions_per_agents`` # # For this we use the ``MultiAgentMLP``, a TorchRL module made exactly for -# multiple agents, with much customisation available. +# multiple agents, with much customization available. # # We will define a different policy for each group and store them in a dictionary. # diff --git a/tutorials/sphinx-tutorials/multiagent_ppo.py b/tutorials/sphinx-tutorials/multiagent_ppo.py index d7d906a4fb0..ec24de6cddd 100644 --- a/tutorials/sphinx-tutorials/multiagent_ppo.py +++ b/tutorials/sphinx-tutorials/multiagent_ppo.py @@ -99,7 +99,7 @@ # wrapper for the VMAS simulator. # # 3. Next, we will design the policy and the critic networks, discussing the impact of the various choices on -# parameter sharing and critic centralisation. +# parameter sharing and critic centralization. # # 4. Next, we will create the sampling collector and the replay buffer. # @@ -184,7 +184,7 @@ # ----------- # # Multi-agent environments simulate multiple agents interacting with the world. -# TorchRL API allows integrating various types of multi-agent environment flavours. +# TorchRL API allows integrating various types of multi-agent environment flavors. # Some examples include environments with shared or individual agent rewards, done flags, and observations. # For more information on how the multi-agent environments API works in TorchRL, you can check out the dedicated # :ref:`doc section `. @@ -195,7 +195,7 @@ # This means that all its state and physics # are PyTorch tensors with a first dimension representing the number of parallel environments in a batch. # This allows leveraging the Single Instruction Multiple Data (SIMD) paradigm of GPUs and significantly -# speed up parallel computation by leveraging parallelisation in GPU warps. It also means +# speed up parallel computation by leveraging parallelization in GPU warps. It also means # that, when using it in TorchRL, both simulation and training can be run on-device, without ever passing # data to the CPU. # @@ -207,7 +207,7 @@ # avoid colliding into each other. # Agents act in a 2D continuous world with drag and elastic collisions. # Their actions are 2D continuous forces which determine their acceleration. -# The reward is composed of three terms: a collision penalisation, a reward based on the distance to the goal, and a +# The reward is composed of three terms: a collision penalization, a reward based on the distance to the goal, and a # final shared reward given when all agents reach their goal. # The distance-based term is computed as the difference in the relative distance # between an agent and its goal over two consecutive timesteps. @@ -391,7 +391,7 @@ # **First**: define a neural network ``n_obs_per_agent`` -> ``2 * n_actions_per_agents`` # # For this we use the ``MultiAgentMLP``, a TorchRL module made exactly for -# multiple agents, with much customisation available. +# multiple agents, with much customization available. # share_parameters_policy = True diff --git a/tutorials/sphinx-tutorials/rb_tutorial.py b/tutorials/sphinx-tutorials/rb_tutorial.py index fc3a3ae954c..f189888b804 100644 --- a/tutorials/sphinx-tutorials/rb_tutorial.py +++ b/tutorials/sphinx-tutorials/rb_tutorial.py @@ -133,7 +133,7 @@ # basic properties (such as shape and dtype) as the first batch of data that # was used to instantiate the buffer. # Passing data that does not match this requirement will either raise an -# exception or lead to some undefined behaviours. +# exception or lead to some undefined behaviors. # - The :class:`~torchrl.data.LazyMemmapStorage` works as the # :class:`~torchrl.data.LazyTensorStorage` in that it is lazy (i.e., it # expects the first batch of data to be instantiated), and it requires data diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py index 6cec838fdc2..1244c465156 100644 --- a/tutorials/sphinx-tutorials/torchrl_demo.py +++ b/tutorials/sphinx-tutorials/torchrl_demo.py @@ -170,7 +170,7 @@ # * a collection of algorithms: we do not intend to provide SOTA implementations of RL algorithms, # but we provide these algorithms only as examples of how to use the library. # -# * a research framework: modularity in TorchRL comes in two flavours. First, we try +# * a research framework: modularity in TorchRL comes in two flavors. First, we try # to build re-usable components, such that they can be easily swapped with each other. # Second, we make our best such that components can be used independently of the rest # of the library.