Skip to content

Commit

Permalink
[CI, BugFix] Fix CI warnings and errors (#1100)
Browse files Browse the repository at this point in the history
  • Loading branch information
vmoens authored Apr 27, 2023
1 parent b871a8e commit 8d8256a
Show file tree
Hide file tree
Showing 44 changed files with 287 additions and 156 deletions.
19 changes: 19 additions & 0 deletions .circleci/unittest/linux_examples/scripts/run_local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -e

# Read script from line 29
filename=".circleci/unittest/linux_examples/scripts/run_test.sh"
start_line=29
script=$(tail -n +$start_line "$filename")

# Replace "cuda:0" with "cpu"
script="${script//cuda:0/cpu}"

# Remove any instances of ".circleci/unittest/helpers/coverage_run_parallel.py"
script="${script//.circleci\/unittest\/helpers\/coverage_run_parallel.py}"
script="${script//coverage combine}"
script="${script//coverage xml -i}"

# Execute the modified script
echo "$script" | bash
42 changes: 24 additions & 18 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -47,15 +47,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -67,7 +68,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -79,7 +80,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -89,6 +90,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
Expand All @@ -101,7 +103,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
frames_per_batch=200 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -114,16 +116,17 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
mode=offline
collector_device=cuda:0 \
mode=offline
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
mode=offline
collector_device=cuda:0 \
device=cuda:0 \
mode=offline

# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
Expand All @@ -133,7 +136,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -145,15 +148,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
collector.collector_device=cuda:0 \
logger.backend= \
logger.log_interval=4 \
optim.lr_scheduler=False
optim.lr_scheduler=False \
optim.device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -165,7 +169,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -177,7 +181,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -187,6 +191,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
collector.total_frames=48 \
collector.frames_per_batch=16 \
collector.collector_device=cuda:0 \
optim.device=cuda:0 \
loss.mini_batch_size=10 \
loss.ppo_epochs=1 \
logger.backend= \
Expand All @@ -199,7 +204,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
frames_per_batch=200 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
collector_device=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
Expand All @@ -213,15 +218,16 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/td3/td3.py \
num_workers=2 \
env_per_collector=1 \
mode=offline \
collector_devices=cuda:0
collector_device=cuda:0
python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_online.py \
total_frames=48 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
mode=offline \
collector_devices=cuda:0
device=cuda:0 \
collector_device=cuda:0

python .circleci/unittest/helpers/coverage_run_parallel.py examples/bandits/dqn.py --n_steps=100

Expand Down
9 changes: 5 additions & 4 deletions examples/a2c/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CatTensors,
DoubleToFloat,
EnvCreator,
ExplorationType,
GrayScale,
NoopResetEnv,
ObservationNorm,
Expand Down Expand Up @@ -261,7 +262,7 @@ def make_a2c_models(cfg):
value_operator=value_module,
)
actor = actor_critic.get_policy_operator()
critic = actor_critic.get_value_operator()
critic = actor_critic.get_value_head() # to avoid duplicate params
else:
actor = policy_module
critic = value_module
Expand Down Expand Up @@ -326,7 +327,7 @@ def make_a2c_modules_state(proof_environment):
distribution_class=distribution_class,
distribution_kwargs=distribution_kwargs,
return_log_prob=True,
default_interaction_mode="random",
default_interaction_type=ExplorationType.RANDOM,
)

# Define the value net
Expand Down Expand Up @@ -412,7 +413,7 @@ def make_a2c_modules_pixels(proof_environment):
distribution_class=distribution_class,
distribution_kwargs=distribution_kwargs,
return_log_prob=True,
default_interaction_mode="random",
default_interaction_type=ExplorationType.RANDOM,
)

# Define another head for the value
Expand Down Expand Up @@ -451,8 +452,8 @@ def make_loss(loss_cfg, actor_network, value_network):
entropy_coef=loss_cfg.entropy_coef,
critic_coef=loss_cfg.critic_coef,
entropy_bonus=True,
gamma=loss_cfg.gamma,
)
loss_module.make_value_estimator(gamma=loss_cfg.gamma)
return loss_module, advantage_module


Expand Down
5 changes: 3 additions & 2 deletions examples/bandits/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,16 @@
actor(env.reset())
loss = DistributionalDQNLoss(
actor,
gamma=0.0,
)
loss.make_value_estimator(gamma=0.9)
else:
model = MLP(
out_features=n_actions, depth=3, num_cells=n_cells, activation_class=nn.Tanh
)
actor = QValueActor(model, action_space="categorical")
actor(env.reset())
loss = DQNLoss(actor, gamma=0.0, loss_function="smooth_l1")
loss = DQNLoss(actor, loss_function="smooth_l1", action_space=env.action_spec)
loss.make_value_estimator(gamma=0.0)
policy = EGreedyWrapper(
actor, eps_greedy, 0.0, annealing_num_steps=n_steps, spec=env.action_spec
)
Expand Down
3 changes: 1 addition & 2 deletions examples/ddpg/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ init_random_frames: 25000
activation: elu
gSDE: 0
from_pixels: 0
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu,cpu,cpu,cpu]
collector_device: cpu
env_per_collector: 8
num_workers: 32
lr_scheduler: ""
Expand Down
2 changes: 1 addition & 1 deletion examples/discrete_sac/discrete_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ def env_factory(num_workers):
qvalue_network=model[1],
num_actions=num_actions,
num_qvalue_nets=2,
gamma=cfg.gamma,
target_entropy_weight=cfg.target_entropy_weight,
loss_function="smooth_l1",
)
loss_module.make_value_estimator(gamma=cfg.gamma)

# Define Target Network Updater
target_net_updater = SoftUpdate(loss_module, cfg.target_update_polyak)
Expand Down
2 changes: 1 addition & 1 deletion examples/distributed/collectors/multi_nodes/ray_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@
entropy_coef=entropy_eps, # these keys match by default but we set this for completeness
value_target_key=advantage_module.value_target_key,
critic_coef=1.0,
gamma=0.99,
loss_critic_type="smooth_l1",
)
loss_module.make_value_estimator(gamma=0.99)

# 7. Define optimizer
optim = torch.optim.Adam(loss_module.parameters(), lr)
Expand Down
3 changes: 1 addition & 2 deletions examples/dqn/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ lr: 3e-4
multi_step: 1
init_random_frames: 25000
from_pixels: 1
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu,cpu,cpu,cpu]
collector_device: cpu
env_per_collector: 8
num_workers: 32
lr_scheduler: ""
Expand Down
3 changes: 1 addition & 2 deletions examples/dreamer/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ from_pixels: True
# we want 50 frames / traj in the replay buffer. Given the frame_skip=2 this makes each traj 100 steps long
env_per_collector: 8
num_workers: 8
# collector_devices: [cuda:1]
collector_devices: cuda:1 # [cpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu]
collector_device: cuda:1
frames_per_batch: 800
optim_steps_per_batch: 80
record_interval: 30
Expand Down
6 changes: 1 addition & 5 deletions examples/dreamer/dreamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,6 @@ def main(cfg: "DictConfig"): # noqa: F821
make_env=create_env_fn,
actor_model_explore=exploration_policy,
cfg=cfg,
# make_env_kwargs=[
# {"device": device}
# for device in cfg.collector_devices
# ],
)
print("collector:", collector)

Expand All @@ -190,7 +186,7 @@ def main(cfg: "DictConfig"): # noqa: F821
record_frames=cfg.record_frames,
frame_skip=cfg.frame_skip,
policy_exploration=policy,
recorder=make_recorder_env(
environment=make_recorder_env(
cfg=cfg,
video_tag=video_tag,
obs_norm_state_dict=obs_norm_state_dict,
Expand Down
10 changes: 5 additions & 5 deletions examples/dreamer/dreamer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ def make_transformed_env(**kwargs) -> TransformedEnv:
from_pixels = cfg.from_pixels

if custom_env is None and custom_env_maker is None:
if isinstance(cfg.collector_devices, str):
device = cfg.collector_devices
elif isinstance(cfg.collector_devices, Sequence):
device = cfg.collector_devices[0]
if isinstance(cfg.collector_device, str):
device = cfg.collector_device
elif isinstance(cfg.collector_device, Sequence):
device = cfg.collector_device[0]
else:
raise ValueError(
"collector_devices must be either a string or a sequence of strings"
"collector_device must be either a string or a sequence of strings"
)
env_kwargs = {
"env_name": env_name,
Expand Down
12 changes: 3 additions & 9 deletions examples/iql/iql_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,7 @@ def make_replay_buffer(
@hydra.main(version_base=None, config_path=".", config_name="online_config")
def main(cfg: "DictConfig"): # noqa: F821

device = (
torch.device("cuda:0")
if torch.cuda.is_available()
and torch.cuda.device_count() > 0
and cfg.device == "cuda:0"
else torch.device("cpu")
)
device = torch.device(cfg.device)

exp_name = generate_exp_name("Online_IQL", cfg.exp_name)
logger = get_logger(
Expand Down Expand Up @@ -199,11 +193,11 @@ def env_factory(num_workers):
qvalue_network=model[1],
value_network=model[2],
num_qvalue_nets=2,
gamma=cfg.gamma,
temperature=cfg.temperature,
expectile=cfg.expectile,
loss_function="smooth_l1",
)
loss_module.make_value_estimator(gamma=cfg.gamma)

# Define Target Network Updater
target_net_updater = SoftUpdate(loss_module, cfg.target_update_polyak)
Expand All @@ -216,7 +210,7 @@ def env_factory(num_workers):
frames_per_batch=cfg.frames_per_batch,
max_frames_per_traj=cfg.max_frames_per_traj,
total_frames=cfg.total_frames,
device=cfg.device,
device=cfg.collector_device,
)
collector.set_seed(cfg.seed)

Expand Down
3 changes: 1 addition & 2 deletions examples/iql/online_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ default_policy_scale: 1.0
scale_lb: 0.1
activation: elu
from_pixels: 0
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu]
collector_device: cuda:0
env_per_collector: 5
frames_per_batch: 1000 # 5*200
max_frames_per_traj: 200
Expand Down
Loading

0 comments on commit 8d8256a

Please sign in to comment.