ray-project · sven1977 · Jan 7, 2025 · Dec 23, 2024 · Dec 27, 2024 · Dec 27, 2024
diff --git a/.vale/styles/config/vocabularies/RLlib/accept.txt b/.vale/styles/config/vocabularies/RLlib/accept.txt
@@ -5,6 +5,9 @@
 [Aa]utoscal(e|ing)
 boolean
 [Cc]allables?
+[Cc]heckpoints?(ing)?
+[Cc]heckpointable
+classmethods?
 coeff
 config
 (DQN|dqn)
@@ -27,6 +30,7 @@ RLModules?
 rollout
 (SAC|sac)
 SGD
+[Ss]ubcomponents?
 [Tt]ensor[Ff]low
 timesteps?
 vectorizes?
diff --git a/.../saving_and_loading_algos_and_policies.py → doc/source/rllib/doc_code/checkpointing.py b/.../saving_and_loading_algos_and_policies.py → doc/source/rllib/doc_code/checkpointing.py
@@ -1,65 +1,5 @@
 # flake8: noqa
 
-# __create-algo-checkpoint-begin__
-# Create a PPO algorithm object using a config object ..
-from ray.rllib.algorithms.ppo import PPOConfig
-
-my_ppo_config = (
-    PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=False,
-        enable_env_runner_and_connector_v2=False,
-    )
-    .environment("CartPole-v1")
-)
-my_ppo = my_ppo_config.build()
-
-# .. train one iteration ..
-my_ppo.train()
-# .. and call `save()` to create a checkpoint.
-save_result = my_ppo.save()
-path_to_checkpoint = save_result.checkpoint.path
-print(
-    "An Algorithm checkpoint has been created inside directory: "
-    f"'{path_to_checkpoint}'."
-)
-
-# Let's terminate the algo for demonstration purposes.
-my_ppo.stop()
-# Doing this will lead to an error.
-# my_ppo.train()
-# __create-algo-checkpoint-end__
-
-
-# __restore-from-algo-checkpoint-begin__
-from ray.rllib.algorithms.algorithm import Algorithm
-
-# Use the Algorithm's `from_checkpoint` utility to get a new algo instance
-# that has the exact same state as the old one, from which the checkpoint was
-# created in the first place:
-my_new_ppo = Algorithm.from_checkpoint(path_to_checkpoint)
-
-# Continue training.
-my_new_ppo.train()
-
-# __restore-from-algo-checkpoint-end__
-
-my_new_ppo.stop()
-
-# __restore-from-algo-checkpoint-2-begin__
-# Re-build a fresh algorithm.
-my_new_ppo = my_ppo_config.build()
-
-# Restore the old (checkpointed) state.
-my_new_ppo.restore(save_result)
-
-# Continue training.
-my_new_ppo.train()
-
-# __restore-from-algo-checkpoint-2-end__
-
-my_new_ppo.stop()
-
 # __multi-agent-checkpoints-begin__
 import os
 
@@ -235,95 +175,3 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 algo_w_2_policies.stop()
 
 # __restore-algorithm-from-checkpoint-with-fewer-policies-end__
-
-
-# __export-models-begin__
-from ray.rllib.algorithms.ppo import PPOConfig
-
-# Create a new Algorithm (which contains a Policy, which contains a NN Model).
-# Switch on for native models to be included in the Policy checkpoints.
-ppo_config = (
-    PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=False,
-        enable_env_runner_and_connector_v2=False,
-    )
-    .environment("Pendulum-v1")
-    .checkpointing(export_native_model_files=True)
-)
-
-# The default framework is TensorFlow, but if you would like to do this example with
-# PyTorch, uncomment the following line of code:
-# ppo_config.framework("torch")
-
-# Create the Algorithm and train one iteration.
-ppo = ppo_config.build()
-ppo.train()
-
-# Get the underlying PPOTF1Policy (or PPOTorchPolicy) object.
-ppo_policy = ppo.get_policy()
-
-# __export-models-end__
-
-# Export the Keras NN model (that our PPOTF1Policy inside the PPO Algorithm uses)
-# to disk ...
-
-# 1) .. using the Policy object:
-
-# __export-models-1-begin__
-ppo_policy.export_model("/tmp/my_nn_model")
-# .. check /tmp/my_nn_model/ for the model files.
-
-# For Keras You should be able to recover the model via:
-# keras_model = tf.saved_model.load("/tmp/my_nn_model/")
-# And pass in a Pendulum-v1 observation:
-# results = keras_model(tf.convert_to_tensor(
-#     np.array([[0.0, 0.1, 0.2]]), dtype=np.float32)
-# )
-
-# For PyTorch, do:
-# pytorch_model = torch.load("/tmp/my_nn_model/model.pt")
-# results = pytorch_model(
-#     input_dict={
-#         "obs": torch.from_numpy(np.array([[0.0, 0.1, 0.2]], dtype=np.float32)),
-#     },
-#     state=[torch.tensor(0)],  # dummy value
-#     seq_lens=torch.tensor(0),  # dummy value
-# )
-
-# __export-models-1-end__
-
-# 2) .. via the Policy's checkpointing method:
-
-# __export-models-2-begin__
-checkpoint_dir = ppo_policy.export_checkpoint("tmp/ppo_policy")
-# .. check /tmp/ppo_policy/model/ for the model files.
-# You should be able to recover the keras model via:
-# keras_model = tf.saved_model.load("/tmp/ppo_policy/model")
-# And pass in a Pendulum-v1 observation:
-# results = keras_model(tf.convert_to_tensor(
-#     np.array([[0.0, 0.1, 0.2]]), dtype=np.float32)
-# )
-
-# __export-models-2-end__
-
-# 3) .. via the Algorithm (Policy) checkpoint:
-
-# __export-models-3-begin__
-checkpoint_dir = ppo.save().checkpoint.path
-# .. check `checkpoint_dir` for the Algorithm checkpoint files.
-# For keras you should be able to recover the model via:
-# keras_model = tf.saved_model.load(checkpoint_dir + "/policies/default_policy/model/")
-# And pass in a Pendulum-v1 observation
-# results = keras_model(tf.convert_to_tensor(
-#     np.array([[0.0, 0.1, 0.2]]), dtype=np.float32)
-# )
-
-# __export-models-3-end__
-
-
-# __export-models-as-onnx-begin__
-# Using the same Policy object, we can also export our NN Model in the ONNX format:
-ppo_policy.export_model("/tmp/my_nn_model", onnx=False)
-
-# __export-models-as-onnx-end__
@@ -239,7 +239,7 @@ The following example demonstrates how to implement a simple custom function wri
 contents to disk from time to time.
 
 You normally don't want to write the contents of buffers along with your
-:ref:`Algorithm checkpoints <rllib-checkpointing-docs>`, so doing this less often, in a more
+:ref:`Algorithm checkpoints <rllib-checkpoints-docs>`, so doing this less often, in a more
 controlled fashion through a custom callback could be a good compromise.
 
 .. testcode::

@@ -65,7 +65,7 @@ Fault Tolerance and Recovery Provided by Ray Tune
 Ray Tune provides fault tolerance and recovery at the experiment trial level.
 
 When using Ray Tune with RLlib, you can enable
-:ref:`periodic checkpointing <rllib-checkpointing-docs>`,
+:ref:`periodic checkpointing <rllib-checkpoints-docs>`,
 which saves the state of the experiment to a user-specified persistent storage location.
 If a trial fails, Ray Tune will automatically restart it from the latest
 :ref:`checkpointed <tune-fault-tol>` state.

@@ -211,9 +211,10 @@ The most direct way to construct your :py:class:`~ray.rllib.core.rl_module.rl_mo
 
 
 .. note::
-    If you have a checkpoint from an `py:class:`~ray.rllib.algorithms.algorithm.Algorithm` or an individual
+    If you have a checkpoint of an `py:class:`~ray.rllib.algorithms.algorithm.Algorithm` or an individual
     :py:class:`~ray.rllib.core.rl_module.rl_module.RLModule`,
-    see :ref:`Checkpointing RLModules <rllib-checkpointing-rl-modules-docs>` for how to create the stored RLModule instance from disk.
+    see :ref:`Creating instances with from_checkpoint <rllib-checkpoints-from-checkpoint>` for how to recreate your
+    :py:class:`~ray.rllib.core.rl_module.rl_module.RLModule` from disk.
 
 
 Construction through RLModuleSpecs
@@ -715,7 +716,7 @@ model hyper-parameters:
     would take turns updating the same shared encoder, which would lead to learning instabilities.
 
 
-.. _rllib-checkpointing-rl-modules-docs:
+.. _rllib-checkpoints-rl-modules-docs:
 
 Checkpointing RLModules
 -----------------------