Skip to content

Commit

Permalink
minor update to SAC: support DM_control training
Browse files Browse the repository at this point in the history
Former-commit-id: 8b2f96b
  • Loading branch information
zuoxingdong committed May 13, 2019
1 parent 90c45bc commit 75b6e6f
Show file tree
Hide file tree
Showing 126 changed files with 218 additions and 162 deletions.
96 changes: 96 additions & 0 deletions baselines/sac/experiment_dm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
from pathlib import Path

import gym
from gym.wrappers import FlattenDictWrapper

from dm_control import suite
from dm2gym import DMControlEnv

from lagom.utils import pickle_dump
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import Condition
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import NormalizeAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStepInfo

from baselines.sac.agent import Agent
from baselines.sac.engine import Engine
from baselines.sac.replay_buffer import ReplayBuffer


config = Config(
{'log.freq': 1000, # every n timesteps
'checkpoint.num': 3,

'env.id': Grid([('cheetah', 'run'), ('hopper', 'hop'), ('walker', 'run'), ('fish', 'upright')]),

'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 3e-4,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 3e-4,
'agent.critic.use_lr_scheduler': False,
'agent.initial_temperature': 1.0,
'agent.max_grad_norm': 999999, # grad clipping by norm

'replay.capacity': 1000000,
# number of time steps to take uniform actions initially
'replay.init_size': Condition(lambda x: 1000 if x['env.id'] in ['Hopper-v3', 'Walker2d-v3'] else 10000),
'replay.batch_size': 256,

'train.timestep': int(1e6), # total number of training (environmental) timesteps
'eval.freq': 5000,
'eval.num_episode': 10

})


def make_env(config, seed):
def _make_env():
domain_name, task_name = config['env.id']
env = suite.load(domain_name, task_name, environment_kwargs=dict(flat_observation=True))
env = DMControlEnv(env)
env = FlattenDictWrapper(env, ['observations'])
env = TimeLimit(env, env.spec.max_episode_steps)
env = NormalizeAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env


def run(config, seed, device, logdir):
set_global_seeds(seed)

env = make_env(config, seed)
env = VecMonitor(env)
env = VecStepInfo(env)

eval_env = make_env(config, seed)
eval_env = VecMonitor(eval_env)

agent = Agent(config, env, device)
replay = ReplayBuffer(env, config['replay.capacity'], device)
engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir)

train_logs, eval_logs = engine.train()
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
return None


if __name__ == '__main__':
run_experiment(run=run,
config=config,
seeds=[4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707],
log_dir='logs/default_dm',
max_workers=os.cpu_count(),
chunksize=1,
use_gpu=True, # GPU much faster, note that performance differs between CPU/GPU
gpu_ids=None)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6cc3e664911b2110a1a1a852ff98685a576f14c2
880425e65efa463ca71ea1ec05a320d7533595ec
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ec66e8e1a080102203977007aa922fe80164c9c3
1688786bd50c037181a71bd7a85b753c0fe59c1a
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2f0358c80c583d7ef3280aee941a092981b84f60
d3485c353d76786a3a51e176211b401b504757a5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
f6257be7f6a09ffb69041693355906c8ec7b832d
6aef052f25dcb8e98e4f4eb08eb58a0160714f87
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ddc3cba873a626db196eb5c8fe4d23e3cf5a28bc
6eb36fd07473726d1708a641b15607a20b84e1c3
Original file line number Diff line number Diff line change
@@ -1 +1 @@
d7ca60092a443d71f89e2440acfea0b8d36d9cfd
7541ced339879d0f01d8d87d17fd94b84bd66740
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2e3ba16469c39b6e08466a5377970b5714c55548
ded16da85d874798c06856c82652874fce7a9942
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b7310ac86aaf1eb6c0fe608d60c7ee57de6bcbdb
8cd1d776149e5050aa26f08b032ba63df7bbc716
Original file line number Diff line number Diff line change
@@ -1 +1 @@
69b01dc32d8c8f497d50537ac5e704fb9bd31271
fe27d17ce12e374f77991d8c40f6fc34d31b7ec7
Original file line number Diff line number Diff line change
@@ -1 +1 @@
65fc77f3f84ee875aaf5effd529024feb05d2034
55a75df78f53b6ca4d79c7e09dfc8138aef46edd
Original file line number Diff line number Diff line change
@@ -1 +1 @@
874c444538dc8c87a79e3a796ddbaf9280105bad
f351bedd0153560cd234eb9cd6d86d3948eaff86
Original file line number Diff line number Diff line change
@@ -1 +1 @@
d999b1e0bb6f31932adbaec412e2c521b3fec9c5
24821f03b6c1a67bde0d21a8cc22af6861784c01
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9a8e134376be7763301c1e829bcdf3e57cb19bf4
a05da24c7d9577ab6634358353cbceb9059cb523
Original file line number Diff line number Diff line change
@@ -1 +1 @@
dba5cf11b80b37e3e1736d6ac7d3987708b229cc
8a91924ea7850c1fcff84ca39d586a0f2d6fb016
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5a71797e7337abfeea9ef13d1dedaf30b345600f
21dcdafc315c92690069590772ed57bcf159e8d5

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
1b24dd149ced16e28a9eab0bab9f2af2e54aa7f2
38e3051b9715b703d19bcaeac52e2418020526b5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
74363c0d02b9371e908dfc8ca625fc933771ef7d
8a01f4d4e642a2e6563ffbbdaf3d68a841aeead5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e1fc870d435fbe08194a427e17dc18fafa2a06f9
887fe70e1d4da193f0a0d9cf5da571fde0179569
3 changes: 0 additions & 3 deletions baselines/sac/logs/default/0/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ID: 0
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
Expand All @@ -9,7 +7,6 @@ agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
141070d8f2b1f12b7032bdd9fe890ffe0b668396
c0a094d58dad40a8e34589fc9831afe417232189

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
377294c8b722f8d2d84272a3e26b6979dca57fdf

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
45b660d2c3a3f686d25d5dc3ab10028a33d4347f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
13561ac0c67369e709b119b6623ce076ef5d7c55
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b89c5001b44ecc9ea95a35bd17a338be63f180c4
edcf36c6cedfee8f86d91b2b13669e8b47b44e01

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1eb98e6350b1a990a442ac2b4698920ad4f7b1b4

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
67dfc9aa574ce29c0d3a4a7a336aca391fe9e827
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4321f40c5e70a16c5875f7ba12c03246db578b43
Original file line number Diff line number Diff line change
@@ -1 +1 @@
95204667b15bc2a16dfa6cc9d791e57f60aabfbd
d2cd8ca81e572de7708c2d7c3754a216efbb2608

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7ae5ede7b92fac56863730d6936d5d280ec2585f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
993a71c653947d1ddad7ac525d54f42b82affa40

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
8602b917ea9f4626bd847c0e1799d4a99a87c120
1174156146d574a47b257b40c9766d417596e6f9
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6bee5b530f12ccd6e65e64e95c958a32d847e006

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0c114bb9c3c77c59e70a49276d26565d66248cc0
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ff148b52e5b8e0ebecab8da98f6e7da1f8e76fd1
090b1136b142c827c9905a093074567588a780ff
Original file line number Diff line number Diff line change
@@ -1 +1 @@
bcc0a3e554d432ae8289aeef27e90adc035ccfcd
65e7f331c69a8f506a559b4de6584281a3d95064

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3897fbeb3eef276e109f057c5cfc488f7d2547b8

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ce3024c27f9695d0823a2f8d70ecf277918b7078

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
b339475ac64009935e49dd10714bc94ac8993ace
363d6c88db37719c96c3f861edf5f9583621f16a
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6f9d7f03c9c4442859bebf34ea5759a024fcd70a
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
c9152b5302c9404e8edb273e1731e5300d2b2bac

This file was deleted.

This file was deleted.

This file was deleted.

3 changes: 0 additions & 3 deletions baselines/sac/logs/default/1/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ID: 1
cuda: true
log.dir: logs/default
log.freq: 1000
checkpoint.num: 3
agent.gamma: 0.99
Expand All @@ -9,7 +7,6 @@ agent.actor.lr: 0.0003
agent.actor.use_lr_scheduler: false
agent.critic.lr: 0.0003
agent.critic.use_lr_scheduler: false
agent.policy_delay: 1
agent.initial_temperature: 1.0
agent.max_grad_norm: 999999
replay.capacity: 1000000
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
f22c729553cd0ff8409dc242b393f46aba4abaa1
42d50ed3493f8dff1fe8d3bba1332a41bdf67de3

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
353d14bf48233d88fedf9e871a62cc9d47342111

This file was deleted.

Loading

0 comments on commit 75b6e6f

Please sign in to comment.