Skip to content

Commit

Permalink
[Minor] small fixes (pytorch#1237)
Browse files Browse the repository at this point in the history
  • Loading branch information
vmoens authored Jun 6, 2023
1 parent 9467036 commit 0961cb3
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 149 deletions.
286 changes: 143 additions & 143 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,153 +29,153 @@ python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20

# With batched environments
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
# env.num_envs=1 \
# env.device=cuda:0 \
# collector.total_frames=48 \
# collector.frames_per_batch=16 \
# collector.collector_device=cuda:0 \
# optim.device=cuda:0 \
# loss.mini_batch_size=10 \
# loss.ppo_epochs=1 \
# logger.backend= \
# logger.log_interval=4 \
# optim.lr_scheduler=False
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
# collector.total_frames=48 \
# collector.init_random_frames=10 \
# optimization.batch_size=10 \
# collector.frames_per_batch=16 \
# collector.num_workers=4 \
# collector.env_per_collector=2 \
# collector.collector_device=cuda:0 \
# network.device=cuda:0 \
# optimization.utd_ratio=1 \
# replay_buffer.size=120 \
# env.name=Pendulum-v1 \
# logger.backend=
## record_video=True \
## record_frames=4 \
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
# env.num_envs=1 \
# collector.total_frames=48 \
# collector.frames_per_batch=16 \
# collector.collector_device=cuda:0 \
# logger.backend= \
# logger.log_interval=4 \
# optim.lr_scheduler=False \
# optim.device=cuda:0
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
# total_frames=48 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
# record_video=True \
# record_frames=4 \
# buffer_size=120
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
# collector.total_frames=48 \
# collector.init_random_frames=10 \
# collector.frames_per_batch=16 \
# collector.num_workers=4 \
# collector.env_per_collector=2 \
# collector.collector_device=cuda:0 \
# optimization.batch_size=10 \
# optimization.utd_ratio=1 \
# replay_buffer.size=120 \
# env.name=Pendulum-v1 \
# logger.backend=
## logger.record_video=True \
## logger.record_frames=4 \
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
# total_frames=200 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=200 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
env.num_envs=1 \
env.device=cuda:0 \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
network.device=cuda:0 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# record_video=True \
# record_frames=4 \
# buffer_size=120 \
# rssm_hidden_dim=17
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
# collector.total_frames=48 \
# collector.init_random_frames=10 \
# optimization.batch_size=10 \
# collector.frames_per_batch=16 \
# collector.num_workers=4 \
# collector.env_per_collector=2 \
# collector.collector_device=cuda:0 \
# network.device=cuda:0 \
# logger.mode=offline \
# env.name=Pendulum-v1 \
# logger.backend=
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
# total_frames=48 \
# batch_size=10 \
# frames_per_batch=16 \
# num_workers=4 \
# env_per_collector=2 \
# collector_device=cuda:0 \
# device=cuda:0 \
# mode=offline
#
## With single envs
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
# total_frames=200 \
# init_random_frames=10 \
# batch_size=10 \
# frames_per_batch=200 \
# num_workers=2 \
# env_per_collector=1 \
# collector_device=cuda:0 \
# optim_steps_per_batch=1 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
env.num_envs=1 \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
optimization.batch_size=10 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# logger.record_video=True \
# logger.record_frames=4 \
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
total_frames=200 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=200 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120 \
rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=4 \
collector.env_per_collector=2 \
collector.collector_device=cuda:0 \
network.device=cuda:0 \
logger.mode=offline \
env.name=Pendulum-v1 \
logger.backend=
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_device=cuda:0 \
device=cuda:0 \
mode=offline

# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
total_frames=200 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=200 \
num_workers=2 \
env_per_collector=1 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120 \
rssm_hidden_dim=17
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
optimization.batch_size=10 \
collector.frames_per_batch=16 \
collector.num_workers=2 \
collector.env_per_collector=1 \
collector.collector_device=cuda:0 \
network.device=cuda:0 \
optimization.utd_ratio=1 \
replay_buffer.size=120 \
env.name=Pendulum-v1 \
logger.backend=
# record_video=True \
# record_frames=4 \
# buffer_size=120 \
# rssm_hidden_dim=17
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
# collector.total_frames=48 \
# collector.init_random_frames=10 \
# optimization.batch_size=10 \
# collector.frames_per_batch=16 \
# collector.num_workers=2 \
# collector.env_per_collector=1 \
# collector.collector_device=cuda:0 \
# network.device=cuda:0 \
# optimization.utd_ratio=1 \
# replay_buffer.size=120 \
# env.name=Pendulum-v1 \
# logger.backend=
## record_video=True \
## record_frames=4 \
#python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
# env.num_envs=1 \
# collector.total_frames=48 \
# collector.frames_per_batch=16 \
# collector.collector_device=cuda:0 \
# logger.backend= \
# logger.log_interval=4 \
# optim.lr_scheduler=False \
# optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
env.num_envs=1 \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@

# TorchRL

<div style="display: flex; justify-content: center;">
<p align="center">
<img src="docs/source/_static/img/icon.png" width="200" >
</div>
</p>

[**Documentation**](#documentation-and-knowledge-base) | [**TensorDict**](#writing-simplified-and-portable-rl-codebase-with-tensordict) |
[**Features**](#features) | [**Examples, tutorials and demos**](#examples-tutorials-and-demos) | [**Citation**](#citation) | [**Installation**](#installation) |
Expand Down
Binary file modified docs/source/_static/img/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 0 additions & 2 deletions examples/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ def main(cfg: "DictConfig"): # noqa: F821

model_device = cfg.optim.device
actor, critic, critic_head = make_ppo_models(cfg)
print("actor", actor)
print("critic", critic)

collector, state_dict = make_collector(cfg, policy=actor)
data_buffer = make_data_buffer(cfg)
Expand Down
1 change: 0 additions & 1 deletion examples/ppo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ def init_stats(env, n_samples_stats, from_pixels):
),
keep_dims=(-1, -2, -3),
)
print("stats:", t.loc, t.scale, t.loc.shape)
else:
t.init_stats(n_samples_stats)

Expand Down
1 change: 0 additions & 1 deletion torchrl/collectors/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,6 @@ def __init__(
else:
self.policy_weights = TensorDict({}, [])

print("sending env to device", self.device)
self.env: EnvBase = self.env.to(self.device)
self.max_frames_per_traj = max_frames_per_traj
if self.max_frames_per_traj > 0:
Expand Down

0 comments on commit 0961cb3

Please sign in to comment.