forked from HorizonRobotics/alf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbc_pendulum_conf.py
84 lines (72 loc) · 2.31 KB
/
bc_pendulum_conf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Copyright (c) 2022 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import torch
import alf
from alf.algorithms.agent import Agent
from alf.algorithms.bc_algorithm import BcAlgorithm
# default params
lr = 1e-4
encoding_dim = 256
fc_layers_params = (encoding_dim, ) * 2
activation = torch.relu_
offline_buffer_length = None
offline_buffer_dir = [
"./hybrid_rl/replay_buffer_data/pendulum_replay_buffer_from_sac_10k"
]
alf.config(
"create_environment", env_name="Pendulum-v0", num_parallel_environments=1)
alf.config(
'Agent',
rl_algorithm_cls=BcAlgorithm,
optimizer=alf.optimizers.Adam(lr=lr),
)
alf.config(
'TrainerConfig',
algorithm_ctor=Agent,
whole_replay_buffer_training=False,
clear_replay_buffer=False)
proj_net = partial(
alf.networks.StableNormalProjectionNetwork,
state_dependent_std=True,
squash_mean=False,
scale_distribution=True,
min_std=1e-3,
max_std=10)
actor_network_cls = partial(
alf.networks.ActorDistributionNetwork,
fc_layer_params=fc_layers_params,
activation=activation,
continuous_projection_net_ctor=proj_net)
alf.config('BcAlgorithm', actor_network_cls=actor_network_cls)
num_iterations = 100000
# training config
alf.config(
"TrainerConfig",
initial_collect_steps=0,
num_updates_per_train_iter=1,
num_iterations=num_iterations,
# disable rl training by setting rl_train_after_update_steps
# to be larger than num_iterations
rl_train_after_update_steps=num_iterations + 1000,
mini_batch_size=64,
mini_batch_length=2,
offline_buffer_dir=offline_buffer_dir,
offline_buffer_length=offline_buffer_length,
num_checkpoints=1,
debug_summaries=True,
evaluate=True,
eval_interval=1000,
num_eval_episodes=3,
)