Skip to content

Commit

Permalink
update build_once for RL (PaddlePaddle#4112)
Browse files Browse the repository at this point in the history
test=develop
  • Loading branch information
songyouwei authored and phlrain committed Dec 24, 2019
1 parent 894429c commit 9e0ab0b
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 26 deletions.
14 changes: 7 additions & 7 deletions dygraph/reinforcement_learning/actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework

parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
Expand Down Expand Up @@ -40,12 +40,12 @@


class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()

self.affine1 = nn.FC(self.full_name(), size=128)
self.action_head = nn.FC(self.full_name(), size=2)
self.value_head = nn.FC(self.full_name(), size=1)
self.affine1 = nn.Linear(4, 128)
self.action_head = nn.Linear(128, 2)
self.value_head = nn.Linear(128, 1)

self.saved_actions = []
self.rewards = []
Expand All @@ -65,7 +65,7 @@ def forward(self, x):
fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()

eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
Expand Down
12 changes: 6 additions & 6 deletions dygraph/reinforcement_learning/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework

parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
Expand Down Expand Up @@ -37,11 +37,11 @@


class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()

self.affine1 = nn.FC(self.full_name(), size=128)
self.affine2 = nn.FC(self.full_name(), size=2)
self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6

self.saved_log_probs = []
Expand All @@ -64,7 +64,7 @@ def forward(self, x):
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)

policy = Policy("PolicyModel")
policy = Policy()

eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
Expand Down
14 changes: 7 additions & 7 deletions dygraph/reinforcement_learning/test_actor_critic_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework

parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
Expand Down Expand Up @@ -40,12 +40,12 @@


class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()

self.affine1 = nn.FC(self.full_name(), size=128)
self.action_head = nn.FC(self.full_name(), size=2)
self.value_head = nn.FC(self.full_name(), size=1)
self.affine1 = nn.Linear(4, 128)
self.action_head = nn.Linear(128, 2)
self.value_head = nn.Linear(128, 1)

self.saved_actions = []
self.rewards = []
Expand All @@ -65,7 +65,7 @@ def forward(self, x):
fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()

eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
Expand Down
12 changes: 6 additions & 6 deletions dygraph/reinforcement_learning/test_reinforce_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework

parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
Expand Down Expand Up @@ -37,11 +37,11 @@


class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()

self.affine1 = nn.FC(self.full_name(), size=128)
self.affine2 = nn.FC(self.full_name(), size=2)
self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6

self.saved_log_probs = []
Expand All @@ -64,7 +64,7 @@ def forward(self, x):
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)

policy = Policy("PolicyModel")
policy = Policy()

eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
Expand Down

0 comments on commit 9e0ab0b

Please sign in to comment.