Skip to content

Commit

Permalink
[Algo] Update offpolicy examples (pytorch#1206)
Browse files Browse the repository at this point in the history
Co-authored-by: vmoens <vincentmoens@gmail.com>
  • Loading branch information
BY571 and vmoens authored Jun 3, 2023
1 parent f59d02e commit e955cfc
Show file tree
Hide file tree
Showing 29 changed files with 1,602 additions and 2,802 deletions.
1 change: 1 addition & 0 deletions .circleci/unittest/linux_examples/scripts/run_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -e
filename=".circleci/unittest/linux_examples/scripts/run_test.sh"
start_line=29
script=$(tail -n +$start_line "$filename")
script="set -e"$'\n'"$script"

# Replace "cuda:0" with "cpu"
script="${script//cuda:0/cpu}"
Expand Down
260 changes: 136 additions & 124 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,97 +29,103 @@ python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20

# With batched environments
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
# env.num_envs=1 \
# collector.total_frames=48 \
# collector.frames_per_batch=16 \
# collector.collector_device=cuda:0 \
# optim.device=cuda:0 \
# loss.mini_batch_size=10 \
# loss.ppo_epochs=1 \
# logger.backend= \
# logger.log_interval=4 \
# optim.lr_scheduler=False
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
# env.num_envs=1 \
# collector.total_frames=48 \
# collector.frames_per_batch=16 \
# collector.collector_device=cuda:0 \
# logger.backend= \
# logger.log_interval=4 \
# optim.lr_scheduler=False \
# optim.device=cuda:0
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
# total_frames=200 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=200 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
env.num_envs=1 \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# record_video=True \
# record_frames=4 \
# buffer_size=120 \
# rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
env.num_envs=1 \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
device=cuda:0 \
mode=offline
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
optimization.batch_size=10 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# logger.record_video=True \
# logger.record_frames=4 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
total_frames=200 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=200 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120 \
rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
network.device=cuda:0 \
logger.mode=offline \
env.name=Pendulum-v1 \
logger.backend=
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
Expand All @@ -131,18 +137,33 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_onli
mode=offline

# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
total_frames=48 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
total_frames=200 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
frames_per_batch=200 \
num_workers=2 \
env_per_collector=1 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
buffer_size=120 \
rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=2 \
collector.env_per_collector=1 \
collector.collector_device=cuda:0 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# record_video=True \
# record_frames=4 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
env.num_envs=1 \
collector.total_frames=48 \
Expand All @@ -164,30 +185,6 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
env.num_envs=1 \
collector.total_frames=48 \
Expand All @@ -199,28 +196,32 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
total_frames=200 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=200 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120 \
rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
mode=offline \
collector_device=cuda:0
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_workers=2 \
collector.env_per_collector=1 \
collector.collector_device=cuda:0 \
optimization.batch_size=10 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# record_video=True \
# record_frames=4 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
Expand All @@ -230,6 +231,17 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_onli
mode=offline \
device=cuda:0 \
collector_device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=2 \
collector.env_per_collector=1 \
logger.mode=offline \
collector.collector_device=cuda:0 \
env.name=Pendulum-v1 \
logger.backend=

python .circleci/unittest/helpers/coverage_run_parallel.py examples/bandits/dqn.py --n_steps=100

Expand Down
9 changes: 0 additions & 9 deletions docs/source/reference/trainers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,21 +192,12 @@ Builders
:toctree: generated/
:template: rl_template_fun.rst

make_a2c_loss
make_a2c_model
make_collector_offpolicy
make_collector_onpolicy
make_ddpg_actor
make_ddpg_loss
make_dqn_actor
make_dqn_loss
make_ppo_loss
make_ppo_model
make_redq_loss
make_redq_model
make_replay_buffer
make_sac_loss
make_sac_model
make_target_updater
make_trainer
parallel_env_constructor
Expand Down
2 changes: 1 addition & 1 deletion examples/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import hydra


@hydra.main(config_path=".", config_name="config")
@hydra.main(config_path=".", config_name="config", version_base="1.1")
def main(cfg: "DictConfig"): # noqa: F821

import torch
Expand Down
Loading

0 comments on commit e955cfc

Please sign in to comment.