From 62a6bdb62803258da052894f37f90e1be394c532 Mon Sep 17 00:00:00 2001 From: pdmurray Date: Mon, 4 Dec 2023 14:18:56 -0800 Subject: [PATCH 1/7] [Doc] Fix the linkcheck CI job Signed-off-by: pdmurray --- .buildkite/build.rayci.yml | 10 +- .buildkite/pipeline.test.yml | 7 -- doc/Makefile | 12 ++- .../cluster/kubernetes/user-guides/config.md | 2 +- doc/source/conf.py | 8 +- doc/source/ray-contribute/docs.ipynb | 10 +- doc/source/ray-overview/examples.rst | 60 +---------- doc/source/ray-overview/ray-libraries.rst | 15 +-- doc/source/rllib/feature_overview.rst | 19 +--- doc/source/rllib/rllib-advanced-api.rst | 5 - doc/source/rllib/rllib-concepts.rst | 99 +------------------ doc/source/rllib/rllib-dev.rst | 4 +- doc/source/rllib/rllib-examples.rst | 33 +------ doc/source/rllib/rllib-models.rst | 10 +- doc/source/rllib/rllib-torch2x.rst | 17 ++-- 15 files changed, 54 insertions(+), 257 deletions(-) delete mode 100644 .buildkite/pipeline.test.yml diff --git a/.buildkite/build.rayci.yml b/.buildkite/build.rayci.yml index 8f66381fd284..85927c4b7535 100644 --- a/.buildkite/build.rayci.yml +++ b/.buildkite/build.rayci.yml @@ -90,7 +90,7 @@ steps: instance_type: medium commands: - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1 + --platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1 --platform cu11.8.0 --platform cu12.1.1 --platform cpu --image-type ray --upload @@ -114,7 +114,7 @@ steps: instance_type: medium-arm64 commands: - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1 + --platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1 --platform cu11.8.0 --platform cu12.1.1 --platform cpu --image-type ray --architecture aarch64 @@ -160,3 +160,9 @@ steps: soft_fail: true depends_on: - forge + + - label: ":book: LinkCheck" + instance_type: medium + command: make -C doc/ linkcheck + depends_on: docbuild + job_env: docbuild diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml deleted file mode 100644 index 90e55af86303..000000000000 --- a/.buildkite/pipeline.test.yml +++ /dev/null @@ -1,7 +0,0 @@ -- label: ":book: LinkCheck" - instance_size: small - commands: - - export LINT=1 - - ./ci/env/install-dependencies.sh - - ./ci/ci.sh check_sphinx_links - soft_fail: True diff --git a/doc/Makefile b/doc/Makefile index a9351ad3cf04..5207a793bc99 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,7 +2,10 @@ # # You can set these variables from the command line. +# Allow linkcheck to run without treating warnings as errors; -W will +# fast-fail if enabled; it's better to gather the whole list of bad links at once. SPHINXOPTS = -a -E -W -j auto +LINKCHECKOPTS = -a -E -j auto SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build @@ -13,9 +16,10 @@ $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx in endif # Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +ALLLINKCHECKOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(LINKCHECKOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source @@ -178,7 +182,7 @@ changes: @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + $(SPHINXBUILD) -b linkcheck $(ALLLINKCHECKOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." diff --git a/doc/source/cluster/kubernetes/user-guides/config.md b/doc/source/cluster/kubernetes/user-guides/config.md index 60c78ba854a8..41628a8c854e 100644 --- a/doc/source/cluster/kubernetes/user-guides/config.md +++ b/doc/source/cluster/kubernetes/user-guides/config.md @@ -177,7 +177,7 @@ To achieve this, it is simplest to use the same Ray image for the Ray head and a In any case, do make sure that all Ray images in your CR carry the same Ray version and Python version. To distribute custom code dependencies across your cluster, you can build a custom container image, -using one of the [official Ray images](https://hub.docker.com/r/rayproject/ray>) as the base. +using one of the [official Ray images](https://hub.docker.com/r/rayproject/ray) as the base. See {ref}`this guide ` to learn more about the official Ray images. For dynamic dependency management geared towards iteration and developement, you can also use {ref}`Runtime Environments `. diff --git a/doc/source/conf.py b/doc/source/conf.py index 13ce2eb90d87..769d6bf429f6 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -249,10 +249,14 @@ def __init__(self, version: str): "https://dev.mysql.com/doc/connector-python/en/", # Returning 522s intermittently. "https://lczero.org/", - # Returns 429 errors in Linkcheck due to too many requests - "https://archive.is/2022.12.16-171259/https://www.businessinsider.com/openai-chatgpt-trained-on-anyscale-ray-generative-lifelike-ai-models-2022-12", # Returns 406 but remains accessible "https://www.uber.com/blog/elastic-xgboost-ray/", + # Aggressive anti-bot checks + "https://archive.vn/*", + "https://archive.is/*", + # 429: Rate limited + "https://medium.com/*", + "https://towardsdatascience.com/*", ] # -- Options for HTML output ---------------------------------------------- diff --git a/doc/source/ray-contribute/docs.ipynb b/doc/source/ray-contribute/docs.ipynb index 858c2b63f322..665c6006c4ee 100644 --- a/doc/source/ray-contribute/docs.ipynb +++ b/doc/source/ray-contribute/docs.ipynb @@ -144,8 +144,7 @@ " [API documentation](https://github.com/ray-project/ray/blob/master/doc/source/tune/api/api.rst).\n", "- Notebooks, written in `.ipynb` format. All Tune examples are written as notebooks. These notebooks render in\n", " the browser like `.md` or `.rst` files, but have the added benefit of adding launch buttons to the top of the\n", - " document, so that users can run the code themselves in either Binder or Google Colab. A good first example to look\n", - " at is [this Tune example](https://github.com/ray-project/ray/blob/master/doc/source/tune/examples/tune-serve-integration-mnist.ipynb).\n", + " document, so that users can run the code themselves in either Binder or Google Colab.\n", "\n", "## Fixing typos and improving explanations\n", "\n", @@ -232,9 +231,10 @@ "Sometimes you might want to add a completely new document to the Ray documentation, like adding a new\n", "user guide or a new example.\n", "\n", - "For this to work, you need to make sure to add the new document explicitly to the \n", - "[`_toc.yml` file](https://github.com/ray-project/ray/blob/master/doc/source/_toc.yml) that determines\n", - "the structure of the Ray documentation.\n", + "For this to work, you need to make sure to add the new document explicitly to a parent document's toctree \n", + "which will determine the structure of the Ray documentation. See \n", + "[the sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-toctree) \n", + "for more information.\n", "\n", "Depending on the type of document you're adding, you might also have to make changes to an existing overview\n", "page that curates the list of documents in question.\n", diff --git a/doc/source/ray-overview/examples.rst b/doc/source/ray-overview/examples.rst index abd310da4730..3f7ec204d89f 100644 --- a/doc/source/ray-overview/examples.rst +++ b/doc/source/ray-overview/examples.rst @@ -604,12 +604,6 @@ Ray Examples Example of how to setup an RLlib algorithm against a locally running Unity3D editor - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item rl - :link: https://github.com/ray-project/ray/blob/master/rllib/env/tests/test_env_with_subprocess.py - - Example of how to ensure subprocesses spawned by envs are killed when RLlib exits. - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_model_loss_and_metrics.py @@ -618,22 +612,10 @@ Ray Examples .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/batch_norm_model.py + :link: https://github.com/ray-project/ray/blob/master/rllib/examples/models/batch_norm_model.py Example of adding batch norm layers to a custom model. - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/eager_execution.py - - Example of how to leverage TensorFlow eager to simplify debugging and design of custom models and policies. - - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_fast_model.py - - Example of a "fast" Model learning only one parameter for tf and torch. - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_model_api.py @@ -642,13 +624,13 @@ Ray Examples .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/trajectory_view_api.py + :link: https://github.com/ray-project/ray/blob/master/rllib/examples/models/trajectory_view_utilizing_models.py An example on how a model can use the trajectory view API to specify its own input. .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/mobilenet_v2_with_lstm.py + :link: https://github.com/ray-project/ray/blob/master/rllib/examples/models/mobilenet_v2_with_lstm_models.py Implementations of `MobileNetV2` and `torch.hub (mobilenet_v2)`-wrapping example models. @@ -682,24 +664,6 @@ Ray Examples Example of how to output custom training metrics to TensorBoard. - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib tensorflow - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_tf_policy.py - - How to setup a custom TFPolicy. - - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib pytorch - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_torch_policy.py - - How to setup a custom TorchPolicy. - - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/rollout_worker_custom_workflow.py - - Example of how to use RLlib's lower-level building blocks to implement a fully customized training workflow. - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib :link: https://github.com/ray-project/ray/blob/master/rllib/examples/two_trainer_workflow.py @@ -833,18 +797,6 @@ Ray Examples Example of hierarchical training using the multi-agent API. - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/iterated_prisoners_dilemma_env.py - - Example of an iterated prisoner's dilemma environment solved by RLlib. - - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/partial_gpus.py - - Example of how to setup fractional GPUs for learning (driver) and environment rollouts (remote workers). - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib :link: https://github.com/ray-project/ray/blob/master/rllib/examples/nested_action_spaces.py @@ -857,12 +809,6 @@ Ray Examples Example of how to handle variable-length or parametric action spaces - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item reinforcement-learning rllib - :link: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_observation_filters.py - - How to filter raw observations coming from the environment for further processing by the Agent's model(s). - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item reinforcement-learning rllib :link: https://github.com/ray-project/ray/blob/master/rllib/examples/complex_struct_space.py diff --git a/doc/source/ray-overview/ray-libraries.rst b/doc/source/ray-overview/ray-libraries.rst index c14501d8265c..7e5f54293ac5 100644 --- a/doc/source/ray-overview/ray-libraries.rst +++ b/doc/source/ray-overview/ray-libraries.rst @@ -11,19 +11,8 @@ more information. .. dropdown:: Adding Your Integration - To add an integration, simply add an entry to the `projects` list of our - Gallery YAML on `GitHub `_. - - .. code-block:: yaml - - - name: the integration link button text - section_title: The section title for this integration - description: A quick description of your library and its integration with Ray - website: The URL of your website - repo: The URL of your project on GitHub - image: The URL of a logo of your project - - That's all! + To add an integration simply add an entry to this file, using the same + ``grid-item-card`` directive that the other examples use. That's all! .. grid:: 1 2 2 3 :gutter: 1 diff --git a/doc/source/rllib/feature_overview.rst b/doc/source/rllib/feature_overview.rst index 62bb20f26898..311f93767606 100644 --- a/doc/source/rllib/feature_overview.rst +++ b/doc/source/rllib/feature_overview.rst @@ -1,23 +1,6 @@ .. List of most important features of RLlib, with sigil-like buttons for each of the features. To be included into different rst files. - -.. container:: clear-both - - .. container:: buttons-float-left - - .. https://docs.google.com/drawings/d/1i_yoxocyEOgiCxcfRZVKpNh0R_-2tQZOX4syquiytAI/edit?skip_itp2_check=true&pli=1 - - .. image:: images/sigils/rllib-sigil-tf-and-torch.svg - :width: 100 - :target: https://github.com/ray-project/ray/blob/master/rllib/examples/custom_tf_policy.py - - .. container:: - - The most **popular deep-learning frameworks**: `PyTorch `_ and `TensorFlow - (tf1.x/2.x static-graph/eager/traced) `_. - - .. container:: clear-both .. container:: buttons-float-left @@ -26,7 +9,7 @@ .. image:: images/sigils/rllib-sigil-distributed-learning.svg :width: 100 - :target: https://github.com/ray-project/ray/blob/master/rllib/examples/tune/framework.py + :target: https://github.com/ray-project/ray/blob/master/rllib/utils/framework.py .. container:: diff --git a/doc/source/rllib/rllib-advanced-api.rst b/doc/source/rllib/rllib-advanced-api.rst index cd97ebac4bb0..97d3d136fb4e 100644 --- a/doc/source/rllib/rllib-advanced-api.rst +++ b/doc/source/rllib/rllib-advanced-api.rst @@ -14,11 +14,6 @@ Sometimes, it is desirable to have full control over training, but still run ins Tune supports :ref:`custom trainable functions ` that can be used to implement `custom training workflows (example) `__. -For even finer-grained control over training, you can use RLlib's lower-level -`building blocks `__ directly to implement -`fully customized training workflows `__. - - Curriculum Learning ~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/rllib/rllib-concepts.rst b/doc/source/rllib/rllib-concepts.rst index bc979c31f2b7..db4fd7443595 100644 --- a/doc/source/rllib/rllib-concepts.rst +++ b/doc/source/rllib/rllib-concepts.rst @@ -164,7 +164,7 @@ We can create an `Algorithm <#algorithms>`__ and try running this policy on a to tune.Tuner(MyAlgo, param_space={"env": "CartPole-v1", "num_workers": 2}).fit() -If you run the above snippet `(runnable file here) `__, you'll probably notice that CartPole doesn't learn so well: +If you run the above snippet, you'll probably notice that CartPole doesn't learn so well: .. code-block:: bash @@ -378,103 +378,6 @@ However, eager can be slower than graph mode unless tracing is enabled. You can also selectively leverage eager operations within graph mode execution with `tf.py_function `__. -Here's an example of using eager ops embedded -`within a loss function `__. - -Building Policies in PyTorch -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Defining a policy in PyTorch is quite similar to that for TensorFlow (and the process of defining a algorithm given a Torch policy is exactly the same). -Here's a simple example of a trivial torch policy `(runnable file here) `__: - -.. code-block:: python - - from ray.rllib.policy.sample_batch import SampleBatch - from ray.rllib.policy.torch_policy_template import build_torch_policy - - def policy_gradient_loss(policy, model, dist_class, train_batch): - logits, _ = model.from_batch(train_batch) - action_dist = dist_class(logits) - log_probs = action_dist.logp(train_batch[SampleBatch.ACTIONS]) - return -train_batch[SampleBatch.REWARDS].dot(log_probs) - - # - MyTorchPolicy = build_torch_policy( - name="MyTorchPolicy", - loss_fn=policy_gradient_loss) - -Now, building on the TF examples above, let's look at how the `A3C torch policy `__ is defined: - -.. code-block:: python - - A3CTorchPolicy = build_torch_policy( - name="A3CTorchPolicy", - get_default_config=lambda: ray.rllib.algorithms.a3c.a3c.DEFAULT_CONFIG, - loss_fn=actor_critic_loss, - stats_fn=loss_and_entropy_stats, - postprocess_fn=add_advantages, - extra_action_out_fn=model_value_predictions, - extra_grad_process_fn=apply_grad_clipping, - optimizer_fn=torch_optimizer, - mixins=[ValueNetworkMixin]) - -``loss_fn``: Similar to the TF example, the actor critic loss is defined over ``batch``. We imperatively execute the forward pass by calling ``model()`` on the observations followed by ``dist_class()`` on the output logits. The output Tensors are saved as attributes of the policy object (e.g., ``policy.entropy = dist.entropy.mean()``), and we return the scalar loss: - -.. code-block:: python - - def actor_critic_loss(policy, model, dist_class, train_batch): - logits, _ = model.from_batch(train_batch) - values = model.value_function() - action_dist = dist_class(logits) - log_probs = action_dist.logp(train_batch[SampleBatch.ACTIONS]) - policy.entropy = action_dist.entropy().mean() - ... - return overall_err - -``stats_fn``: The stats function references ``entropy``, ``pi_err``, and ``value_err`` saved from the call to the loss function, similar in the PPO TF example: - -.. code-block:: python - - def loss_and_entropy_stats(policy, train_batch): - return { - "policy_entropy": policy.entropy.item(), - "policy_loss": policy.pi_err.item(), - "vf_loss": policy.value_err.item(), - } - -``extra_action_out_fn``: We save value function predictions given model outputs. This makes the value function predictions of the model available in the trajectory as ``batch[SampleBatch.VF_PREDS]``: - -.. code-block:: python - - def model_value_predictions(policy, input_dict, state_batches, model): - return {SampleBatch.VF_PREDS: model.value_function().cpu().numpy()} - -``postprocess_fn`` and ``mixins``: Similar to the PPO example, we need access to the value function during postprocessing (i.e., ``add_advantages`` below calls ``policy._value()``. The value function is exposed through a mixin class that defines the method: - -.. code-block:: python - - def add_advantages(policy, - sample_batch, - other_agent_batches=None, - episode=None): - completed = sample_batch[SampleBatch.DONES][-1] - if completed: - last_r = 0.0 - else: - last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1]) - return compute_advantages(sample_batch, last_r, policy.config["gamma"], - policy.config["lambda"]) - - class ValueNetworkMixin(object): - def _value(self, obs): - with self.lock: - obs = torch.from_numpy(obs).float().unsqueeze(0).to(self.device) - _, _, vf, _ = self.model({"obs": obs}, []) - return vf.detach().cpu().numpy().squeeze() - -You can find the full policy definition in `a3c_torch_policy.py `__. - -In summary, the main differences between the PyTorch and TensorFlow policy builder functions is that the TF loss and stats functions are built symbolically when the policy is initialized, whereas for PyTorch (or TensorFlow Eager) these functions are called imperatively each time they are used. Extending Existing Policies ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/rllib/rllib-dev.rst b/doc/source/rllib/rllib-dev.rst index 83db368af7c0..f65728b09732 100644 --- a/doc/source/rllib/rllib-dev.rst +++ b/doc/source/rllib/rllib-dev.rst @@ -111,7 +111,7 @@ Keeping the memory usage of long running workers stable can be challenging. The .. autoclass:: ray.rllib.algorithms.callbacks.MemoryTrackingCallbacks -The objects with the top 20 memory usage in the workers will be added as custom metrics. These can then be monitored using tensorboard or other metrics integrations like Weights and Biases: +The objects with the top 20 memory usage in the workers will be added as custom metrics. These can then be monitored using tensorboard or other metrics integrations like Weights and Biases: .. image:: images/MemoryTrackingCallbacks.png @@ -150,5 +150,3 @@ It returns three values: * ``tf1``: The ``tf.compat.v1`` module or the installed tf1.x package (if the version is < 2.0). * ``tf``: The installed tensorflow module as-is. * ``tfv``: A version integer, whose value is either 1 or 2. - -`See here `__ for a detailed example script. diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst index 4dea2e474283..5af2ac7a5140 100644 --- a/doc/source/rllib/rllib-examples.rst +++ b/doc/source/rllib/rllib-examples.rst @@ -43,9 +43,6 @@ Environments and Adapters Use this example to try things out and watch the game and the learning progress live in the editor. Providing a compiled game, this example could also run in distributed fashion with `num_workers > 0`. For a more heavy-weight, distributed, cloud-based example, see ``Unity3D client/server`` below. -- `Subprocess environment `__: - Example of how to ensure subprocesses spawned by envs are killed when RLlib exits. - Custom- and Complex Models -------------------------- @@ -54,17 +51,13 @@ Custom- and Complex Models Example of using a custom Keras model. - `Registering a custom model with supervised loss `__: Example of defining and registering a custom model with a supervised loss. -- `Batch normalization `__: +- `Batch normalization `__: Example of adding batch norm layers to a custom model. -- `Eager execution `__: - Example of how to leverage TensorFlow eager to simplify debugging and design of custom models and policies. -- `Custom "Fast" Model `__: - Example of a "fast" Model learning only one parameter for tf and torch. - `Custom model API example `__: Shows how to define a custom Model API in RLlib, such that it can be used inside certain algorithms. -- `Trajectory View API utilizing model `__: +- `Trajectory View API utilizing model `__: An example on how a model can use the trajectory view API to specify its own input. -- `MobileNetV2 wrapping example model `__: +- `MobileNetV2 wrapping example model `__: Implementations of `tf.keras.applications.mobilenet_v2.MobileNetV2` and `torch.hub (mobilenet_v2)`-wrapping example models. - `Differentiable Neural Computer `__: Example of DeepMind's Differentiable Neural Computer for partially-observable environments. @@ -82,12 +75,6 @@ Training Workflows How to setup a custom Logger object in RLlib. - `Custom metrics `__: Example of how to output custom training metrics to TensorBoard. -- `Custom Policy class (TensorFlow) `__: - How to setup a custom TFPolicy. -- `Custom Policy class (PyTorch) `__: - How to setup a custom TorchPolicy. -- `Using rollout workers directly for control over the whole training workflow `__: - Example of how to use RLlib's lower-level building blocks to implement a fully customized training workflow. - `Custom execution plan function handling two different Policies (DQN and PPO) at the same time `__: Example of how to use the exec. plan of an Algorithm to trin two different policies in parallel (also using multi-agent API). - `Custom tune experiment `__: @@ -150,14 +137,6 @@ Multi-Agent and Hierarchical Example of alternating training between DQN and PPO. - `Hierarchical training `__: Example of hierarchical training using the multi-agent API. -- `Iterated Prisoner's Dilemma environment example `__: - Example of an iterated prisoner's dilemma environment solved by RLlib. - - -GPU examples ------------- -- `Example showing how to setup fractional GPUs `__: - Example of how to setup fractional GPUs for learning (driver) and environment rollouts (remote workers). Special Action- and Observation Spaces @@ -166,9 +145,7 @@ Special Action- and Observation Spaces - `Nested action spaces `__: Learning in arbitrarily nested action spaces. - `Parametric actions `__: - Example of how to handle variable-length or parametric action spaces (see also `this example here `__). -- `Custom observation filters `__: - How to filter raw observations coming from the environment for further processing by the Agent's model(s). + Example of how to handle variable-length or parametric action spaces. - `Using the "Repeated" space of RLlib for variable lengths observations `__: How to use RLlib's `Repeated` space to handle variable length observations. - `Autoregressive action distribution example `__: @@ -181,7 +158,7 @@ Community Examples A General Evaluation Platform and Building Toolkit for Single/Multi-Agent Intelligence with RLlib-generated baselines. - `CARLA `__: - Example of training autonomous vehicles with RLlib and `CARLA `__ simulator. + Example of training autonomous vehicles with RLlib and `CARLA `__ simulator. - `The Emergence of Adversarial Communication in Multi-Agent Reinforcement Learning `__: Using Graph Neural Networks and RLlib to train multiple cooperative and adversarial agents to solve the "cover the area"-problem, thereby learning how to best communicate (or - in the adversarial case - how to disturb communication) (`code `__). diff --git a/doc/source/rllib/rllib-models.rst b/doc/source/rllib/rllib-models.rst index fca472078d5f..40da5ee45ac6 100644 --- a/doc/source/rllib/rllib-models.rst +++ b/doc/source/rllib/rllib-models.rst @@ -32,7 +32,7 @@ observation space. Thereby, the following simple rules apply: - Discrete observations are one-hot encoded, e.g. ``Discrete(3) and value=1 -> [0, 1, 0]``. - MultiDiscrete observations are encoded by one-hot encoding each discrete element - and then concatenating the respective one-hot encoded vectors. + and then concatenating the respective one-hot encoded vectors. e.g. ``MultiDiscrete([3, 4]) and value=[1, 3] -> [0 1 0 0 0 0 1]`` because the first ``1`` is encoded as ``[0 1 0]`` and the second ``3`` is encoded as ``[0 0 0 1]``; these two vectors are then concatenated to ``[0 1 0 0 0 0 1]``. @@ -343,7 +343,7 @@ Batch Normalization ``````````````````` You can use ``tf.layers.batch_normalization(x, training=input_dict["is_training"])`` to add batch norm layers to your custom model -(see a `code example here `__). +(see a `code example here `__). RLlib will automatically run the update ops for the batch norm layers during optimization (see `tf_policy.py `__ and `multi_gpu_learner_thread.py `__ for the exact handling of these updates). @@ -587,15 +587,15 @@ Custom models can be used to work with environments where (1) the set of valid a return action_logits + inf_mask, state -Depending on your use case it may make sense to use |just the masking|_, |just action embeddings|_, or |both|_. For a runnable example of "just action embeddings" in code, -check out `examples/parametric_actions_cartpole.py `__. +Depending on your use case it may make sense to use |just the masking|_, |just action embeddings|_, or |both|_. For a runnable example of "just action embeddings" in code, +check out `examples/parametric_actions_cartpole.py `__. .. |just the masking| replace:: just the **masking** .. _just the masking: https://github.com/ray-project/ray/blob/master/rllib/examples/models/action_mask_model.py .. |just action embeddings| replace:: just action **embeddings** .. _just action embeddings: https://github.com/ray-project/ray/blob/master/rllib/examples/parametric_actions_cartpole.py .. |both| replace:: **both** -.. _both: https://github.com/ray-project/ray/blob/master/rllib/examples/models/parametric_actions_model.py +.. _both: https://github.com/ray-project/ray/blob/master/rllib/examples/models/parametric_actions_model.py Note that since masking introduces ``tf.float32.min`` values into the model output, this technique might not work with all algorithm options. For example, algorithms might crash if they incorrectly process the ``tf.float32.min`` values. The cartpole example has working configurations for DQN (must set ``hiddens=[]``), PPO (must disable running mean and set ``model.vf_share_layers=True``), and several other algorithms. Not all algorithms support parametric actions; see the `algorithm overview `__. diff --git a/doc/source/rllib/rllib-torch2x.rst b/doc/source/rllib/rllib-torch2x.rst index 5d1ccb103f04..3981db319108 100644 --- a/doc/source/rllib/rllib-torch2x.rst +++ b/doc/source/rllib/rllib-torch2x.rst @@ -5,7 +5,7 @@ Using RLlib with torch 2.x compile ================================== -torch 2.x comes with the ``torch.compile()`` `API `_, which leverages `torch dynamo `_ under the hood to JIT-compile wrapped code. We integrate ``torch.compile()`` with RLlib in the context of `RLModules `_ and Learners. +torch 2.x comes with the ``torch.compile()`` `API `_, which can be used to JIT-compile wrapped code. We integrate ``torch.compile()`` with RLlib in the context of `RLModules `_ and Learners. We have integrated this feature with RLModules. You can set the backend and mode via ``framework()`` API on an :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConfig` object. Alternatively, you can compile the :py:class:`~ray.rllib.core.rl_module.rl_module.RLModule` directly during stand-alone usage, such as inference. @@ -18,7 +18,7 @@ Inference --------- For the benchmarking metric, we compute the inverse of the time it takes to run :py:meth:`~ray.rllib.core.rl_module.rl_module.RLModule.forward_exploration` of the RLModule. We have conducted this benchmark on the default implementation of PPO RLModule under different hardware settings, torch versions, dynamo backends and modes, as well as different batch sizes. The following table shows the combinations of torch-backend and -mode that yield the highest speedup we could find for a given combination of hardware and PyTorch version: -.. list-table:: +.. list-table:: :widths: 25 25 25 25 :header-rows: 1 @@ -64,7 +64,7 @@ For detailed tables, see `Appendix <../../../../rllib/benchmarks/torch_compile/R .. code-block:: bash - + python ./run_inference_bm.py --backend --mode -bs Some meta-level comments @@ -94,18 +94,18 @@ In RLlib, you can now set the configuration so that it uses the compiled module ) -`This <../../../../rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py>`_ benchmark script runs the PPO algorithm with the default model architecture for the Atari-Breakout game. It runs the training for ``n`` iterations for both compiled and non-compiled RLModules and reports the speedup. Note that negative speedup values mean a slowdown when you compile the module. +`This <../../../../rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py>`_ benchmark script runs the PPO algorithm with the default model architecture for the Atari-Breakout game. It runs the training for ``n`` iterations for both compiled and non-compiled RLModules and reports the speedup. Note that negative speedup values mean a slowdown when you compile the module. To run the benchmark script, you need a Ray cluster comprised of at least 129 CPUs (2x64 + 1) and 2 GPUs. If this configuration is not accessible to you, you can change the number of sampling workers and batch size to make the requirements smaller. .. code-block:: bash - + python ./run_ppo_with_inference_bm.py --backend --mode - + Here is a summary of results: -.. list-table:: +.. list-table:: :widths: 33 33 33 :header-rows: 1 @@ -128,5 +128,4 @@ Here is a summary of results: - max-autotune - 12.88 -As you can see, ``onnxrt`` does not gain any speedups in the setup we tested (in fact it slows the workload down by 70%), while the ``ipex`` provides ~10% speedup. If we change the model architecture, these numbers may change. So it is very important to fix the architecture first and then search for the fastest training settings. - +As you can see, ``onnxrt`` does not gain any speedups in the setup we tested (in fact it slows the workload down by 70%), while the ``ipex`` provides ~10% speedup. If we change the model architecture, these numbers may change. So it is very important to fix the architecture first and then search for the fastest training settings. From 289016d9de8314d32fb8a1b99248f476635b7236 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:43:16 -0800 Subject: [PATCH 2/7] Update doc/source/ray-contribute/docs.ipynb Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- doc/source/ray-contribute/docs.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/ray-contribute/docs.ipynb b/doc/source/ray-contribute/docs.ipynb index 665c6006c4ee..c830ae49a84c 100644 --- a/doc/source/ray-contribute/docs.ipynb +++ b/doc/source/ray-contribute/docs.ipynb @@ -231,7 +231,7 @@ "Sometimes you might want to add a completely new document to the Ray documentation, like adding a new\n", "user guide or a new example.\n", "\n", - "For this to work, you need to make sure to add the new document explicitly to a parent document's toctree \n", + "For this to work, you need to make sure to add the new document explicitly to a parent document's toctree, \n", "which will determine the structure of the Ray documentation. See \n", "[the sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-toctree) \n", "for more information.\n", From 11b04d60ecc1d18bdb01a573ba7beb91853371b5 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:43:26 -0800 Subject: [PATCH 3/7] Update doc/source/ray-contribute/docs.ipynb Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- doc/source/ray-contribute/docs.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/ray-contribute/docs.ipynb b/doc/source/ray-contribute/docs.ipynb index c830ae49a84c..35f3adf9d67a 100644 --- a/doc/source/ray-contribute/docs.ipynb +++ b/doc/source/ray-contribute/docs.ipynb @@ -232,7 +232,7 @@ "user guide or a new example.\n", "\n", "For this to work, you need to make sure to add the new document explicitly to a parent document's toctree, \n", - "which will determine the structure of the Ray documentation. See \n", + "which determines the structure of the Ray documentation. See \n", "[the sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-toctree) \n", "for more information.\n", "\n", From 9f2be6d22d2ea9d67d6ff9ff62a5858e09d43387 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:43:35 -0800 Subject: [PATCH 4/7] Update doc/source/ray-overview/ray-libraries.rst Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- doc/source/ray-overview/ray-libraries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/ray-overview/ray-libraries.rst b/doc/source/ray-overview/ray-libraries.rst index 7e5f54293ac5..758b21eeb30e 100644 --- a/doc/source/ray-overview/ray-libraries.rst +++ b/doc/source/ray-overview/ray-libraries.rst @@ -11,7 +11,7 @@ more information. .. dropdown:: Adding Your Integration - To add an integration simply add an entry to this file, using the same + To add an integration add an entry to this file, using the same ``grid-item-card`` directive that the other examples use. That's all! .. grid:: 1 2 2 3 From 0ebe4040ed43ef4d818ebdaa7370a1fe2695e5b8 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:43:41 -0800 Subject: [PATCH 5/7] Update doc/source/ray-overview/ray-libraries.rst Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- doc/source/ray-overview/ray-libraries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/ray-overview/ray-libraries.rst b/doc/source/ray-overview/ray-libraries.rst index 758b21eeb30e..592e36032bae 100644 --- a/doc/source/ray-overview/ray-libraries.rst +++ b/doc/source/ray-overview/ray-libraries.rst @@ -12,7 +12,7 @@ more information. .. dropdown:: Adding Your Integration To add an integration add an entry to this file, using the same - ``grid-item-card`` directive that the other examples use. That's all! + ``grid-item-card`` directive that the other examples use. .. grid:: 1 2 2 3 :gutter: 1 From 161aa0f611d0ad96f09fe481aa72b2b2e3ff5b93 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:43:57 -0800 Subject: [PATCH 6/7] Update doc/source/rllib/rllib-concepts.rst Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- doc/source/rllib/rllib-concepts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/rllib/rllib-concepts.rst b/doc/source/rllib/rllib-concepts.rst index db4fd7443595..a31e589ad1d9 100644 --- a/doc/source/rllib/rllib-concepts.rst +++ b/doc/source/rllib/rllib-concepts.rst @@ -164,7 +164,7 @@ We can create an `Algorithm <#algorithms>`__ and try running this policy on a to tune.Tuner(MyAlgo, param_space={"env": "CartPole-v1", "num_workers": 2}).fit() -If you run the above snippet, you'll probably notice that CartPole doesn't learn so well: +If you run the above snippet, notice that CartPole doesn't learn so well: .. code-block:: bash From d2c3c475116ff5c97ab2d449ef59463ee913fae9 Mon Sep 17 00:00:00 2001 From: Peyton Murray Date: Fri, 8 Dec 2023 15:44:18 -0800 Subject: [PATCH 7/7] Update doc/source/rllib/rllib-dev.rst Co-authored-by: angelinalg <122562471+angelinalg@users.noreply.github.com> Signed-off-by: Peyton Murray --- .buildkite/build.rayci.yml | 6 ------ .buildkite/lint.rayci.yml | 34 ++++++++++++++++++++++++++++++++-- doc/source/rllib/rllib-dev.rst | 2 +- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/.buildkite/build.rayci.yml b/.buildkite/build.rayci.yml index 85927c4b7535..32954c046a62 100644 --- a/.buildkite/build.rayci.yml +++ b/.buildkite/build.rayci.yml @@ -160,9 +160,3 @@ steps: soft_fail: true depends_on: - forge - - - label: ":book: LinkCheck" - instance_type: medium - command: make -C doc/ linkcheck - depends_on: docbuild - job_env: docbuild diff --git a/.buildkite/lint.rayci.yml b/.buildkite/lint.rayci.yml index 71baeb934289..c4fcbfdb07f5 100644 --- a/.buildkite/lint.rayci.yml +++ b/.buildkite/lint.rayci.yml @@ -1,63 +1,93 @@ group: lint -depends_on: - - forge steps: - label: ":lint-roller: lint: clang format" + depends_on: + - forge commands: - pip install -c python/requirements_compiled.txt clang-format - ./ci/lint/check-git-clang-format-output.sh - label: ":lint-roller: lint: code format" + depends_on: + - forge commands: - pip install -c python/requirements_compiled.txt -r python/requirements/lint-requirements.txt - FORMAT_SH_PRINT_DIFF=1 ./ci/lint/format.sh --all-scripts - label: ":lint-roller: lint: untested code snippet" + depends_on: + - forge commands: - pip install -c python/requirements_compiled.txt semgrep - semgrep ci --config semgrep.yml - label: ":lint-roller: lint: banned words" + depends_on: + - forge commands: - ./ci/lint/check-banned-words.sh - label: ":lint-roller: lint: doc readme" + depends_on: + - forge commands: - pip install -c python/requirements_compiled.txt docutils - cd python && python setup.py check --restructuredtext --strict --metadata - label: ":lint-roller: lint: dashboard format" + depends_on: + - forge commands: - ./ci/lint/check-dashboard-format.sh - label: ":lint-roller: lint: copyright format" + depends_on: + - forge commands: - ./ci/lint/copyright-format.sh -c - label: ":lint-roller: lint: bazel team" + depends_on: + - forge commands: - bazel query 'kind("cc_test", //...)' --output=xml | python ./ci/lint/check-bazel-team-owner.py - bazel query 'kind("py_test", //...)' --output=xml | python ./ci/lint/check-bazel-team-owner.py - label: ":lint-roller: lint: bazel buildifier" + depends_on: + - forge commands: - ./ci/lint/check-bazel-buildifier.sh - label: ":lint-roller: lint: pytest format" + depends_on: + - forge commands: - pip install -c python/requirements_compiled.txt yq - ./ci/lint/check-pytest-format.sh - label: ":lint-roller: lint: test coverage" + depends_on: + - forge commands: - python ci/pipeline/check-test-run.py - label: ":lint-roller: lint: api annotations" + depends_on: + - forge instance_type: medium commands: - RAY_DISABLE_EXTRA_CPP=1 pip install -e python/[all] - ./ci/lint/check_api_annotations.py - label: ":lint-roller: lint: documentation style" + depends_on: + - forge commands: - ./ci/lint/check-documentation-style.sh + + - label: ":lint-roller: lint: linkcheck" + instance_type: medium + command: make -C doc/ linkcheck + depends_on: docbuild + job_env: docbuild diff --git a/doc/source/rllib/rllib-dev.rst b/doc/source/rllib/rllib-dev.rst index f65728b09732..9b5fde383351 100644 --- a/doc/source/rllib/rllib-dev.rst +++ b/doc/source/rllib/rllib-dev.rst @@ -111,7 +111,7 @@ Keeping the memory usage of long running workers stable can be challenging. The .. autoclass:: ray.rllib.algorithms.callbacks.MemoryTrackingCallbacks -The objects with the top 20 memory usage in the workers will be added as custom metrics. These can then be monitored using tensorboard or other metrics integrations like Weights and Biases: +The objects with the top 20 memory usage in the workers are added as custom metrics. These can then be monitored using tensorboard or other metrics integrations like Weights & Biases: .. image:: images/MemoryTrackingCallbacks.png