Finished PQN documentation by adding commands to run the benchmarks a…

…d plotting the results
vwxyzjn · Nov 14, 2024 · d9c300b · d9c300b
1 parent c8acf70
commit d9c300b
Show file tree

Hide file tree

Showing 3 changed files with 113 additions and 0 deletions.
diff --git a/benchmark/pqn.sh b/benchmark/pqn.sh
@@ -0,0 +1,32 @@
+poetry install
+OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
+    --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
+    --command "poetry run python cleanrl/pqn.py --no_cuda --track" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+
+poetry install -E envpool
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids Breakout-v5 SpaceInvaders-v5 BeamRider-v5 Pong-v5 MsPacman-v5 \
+    --command "poetry run python cleanrl/pqn_atari_envpool.py --track" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+
+poetry install -E envpool
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids Breakout-v5 SpaceInvaders-v5 BeamRider-v5 Pong-v5 MsPacman-v5 \
+    --command "poetry run python cleanrl/pqn_atari_envpool_lstm.py --track" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/pqn_plot.sh b/benchmark/pqn_plot.sh
@@ -0,0 +1,50 @@
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=rogercreus&wpn=cleanRL&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'pqn?tag=pr-472&cl=CleanRL PQN' \
+    --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/pqn \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=rogercreus&wpn=cleanRL&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'pqn_atari_envpool?tag=pr-472&cl=CleanRL PQN' \
+    --env-ids Breakout-v5 SpaceInvaders-v5 BeamRider-v5 Pong-v5 MsPacman-v5 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 3 \
+    --rliable \
+    --rc.score_normalization_method maxmin \
+    --rc.normalized_score_threshold 1.0 \
+    --rc.sample_efficiency_plots \
+    --rc.sample_efficiency_and_walltime_efficiency_method Median \
+    --rc.performance_profile_plots  \
+    --rc.aggregate_metrics_plots  \
+    --rc.sample_efficiency_num_bootstrap_reps 10 \
+    --rc.performance_profile_num_bootstrap_reps 10 \
+    --rc.interval_estimates_num_bootstrap_reps 10 \
+    --output-filename static/0compare \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=rogercreus&wpn=cleanRL&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'pqn_atari_envpool_lstm?tag=pr-472&cl=CleanRL PQN' \
+    --env-ids Breakout-v5 SpaceInvaders-v5 BeamRider-v5 MsPacman-v5 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 3 \
+    --rliable \
+    --rc.score_normalization_method maxmin \
+    --rc.normalized_score_threshold 1.0 \
+    --rc.sample_efficiency_plots \
+    --rc.sample_efficiency_and_walltime_efficiency_method Median \
+    --rc.performance_profile_plots  \
+    --rc.aggregate_metrics_plots  \
+    --rc.sample_efficiency_num_bootstrap_reps 10 \
+    --rc.performance_profile_num_bootstrap_reps 10 \
+    --rc.interval_estimates_num_bootstrap_reps 10 \
+    --output-filename static/0compare \
+    --scan-history
diff --git a/docs/rl-algorithms/pqn.md b/docs/rl-algorithms/pqn.md
@@ -73,6 +73,12 @@ Running `python cleanrl/pqn.py` will automatically record various metrics such a
 
 ### Experiment results
 
+To run benchmark experiments, see :material-github: [benchmark/pqn.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/pqn.sh). Specifically, execute the following command:
+
+``` title="benchmark/pqn.sh" linenums="1"
+--8<-- "benchmark/pqn.sh:0:6"
+```
+
 Episode Rewards:
 
 | Environment      | CleanRL PQN       |
@@ -91,6 +97,10 @@ Runtime:
 
 Learning curves:
 
+``` title="benchmark/pqn_plot.sh" linenums="1"
+--8<-- "benchmark/pqn_plot.sh:1:9"
+```
+
 <img src="../pqn/pqn_state.png">
 
 Tracked experiments: 
@@ -149,6 +159,12 @@ See [related docs](/rl-algorithms/pqn/#explanation-of-the-logged-metrics) for `p
 
 ### Experiment results
 
+To run benchmark experiments, see :material-github: [benchmark/pqn.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/pqn.sh). Specifically, execute the following command:
+
+``` title="benchmark/pqn.sh" linenums="1"
+--8<-- "benchmark/pqn.sh:12:17"
+```
+
 Episode Rewards:
 
 | Environment       | CleanRL PQN        |
@@ -172,6 +188,10 @@ Runtime:
 
 Learning curves:
 
+``` title="benchmark/pqn_plot.sh" linenums="1"
+--8<-- "benchmark/pqn_plot.sh:11:29"
+```
+
 <img src="../pqn/pqn.png">
 
 Tracked experiments: 
@@ -239,6 +259,13 @@ To help test out the memory, we remove the 4 stacked frames from the observation
 
 ### Experiment results
 
+To run benchmark experiments, see :material-github: [benchmark/pqn.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/pqn.sh). Specifically, execute the following command:
+
+``` title="benchmark/pqn.sh" linenums="1"
+--8<-- "benchmark/pqn.sh:23:28"
+```
+
+
 Episode Rewards:
 
 | Environment       | CleanRL PQN        |
@@ -259,6 +286,10 @@ Runtime:
 
 Learning curves:
 
+``` title="benchmark/pqn_plot.sh" linenums="1"
+--8<-- "benchmark/pqn_plot.sh:32:50"
+```
+
 <img src="../pqn/pqn_lstm.png">
 
 Tracked experiments: