Skip to content

Commit

Permalink
Replace episode_reward with episodic_return (vwxyzjn#125)
Browse files Browse the repository at this point in the history
* Replace `episode_reward` with `episodic_return`

* Quick fix: deleting the `wandb` folder at root
  • Loading branch information
vwxyzjn authored Feb 28, 2022
1 parent b63315b commit 0b3f8ea
Show file tree
Hide file tree
Showing 14 changed files with 25 additions and 25 deletions.
8 changes: 4 additions & 4 deletions cleanrl/apex_dqn_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu
# TRY NOT TO MODIFY: start the game
obs = env.reset()
storage = []
episode_reward = 0
episodic_return = 0
update_step = 0
while True:
update_step += 1
Expand All @@ -787,7 +787,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu

# TRY NOT TO MODIFY: execute the game and log data.
next_obs, reward, done, info = env.step(action)
episode_reward += reward
episodic_return += reward
storage += [(obs, action, reward, next_obs, float(done))]
with lock:
global_step += 1
Expand Down Expand Up @@ -834,7 +834,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu
# important to note that because `EpisodicLifeEnv` wrapper is applied,
# the real episode reward is actually the sum of episode reward of 5 lives
# which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
obs, episode_reward = env.reset(), 0
obs, episodic_return = env.reset(), 0


def data_process(args, i, global_step, rollouts_queue, data_process_queue, data_process_back_queues, device):
Expand Down Expand Up @@ -1102,7 +1102,7 @@ def learn(
m = stats_queue.get()
if m[0] == "charts/episodic_return":
r, l = m[1], m[2]
print(f"global_step={global_step}, episode_reward={r}")
print(f"global_step={global_step}, episodic_return={r}")
writer.add_scalar("charts/episodic_return", r, global_step)
writer.add_scalar("charts/stats_queue_size", stats_queue.qsize(), global_step)
writer.add_scalar("charts/rollouts_queue_size", rollouts_queue.qsize(), global_step)
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/c51.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)
break
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/c51_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)
break
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/ddpg_continuous_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def forward(self, x):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
break

Expand Down
2 changes: 1 addition & 1 deletion cleanrl/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)
break
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/dqn_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)
break
Expand Down
8 changes: 4 additions & 4 deletions cleanrl/offline/offline_dqn_atari_visual.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ def __iter__(self):
print(q_network)
# TRY NOT TO MODIFY: start the game
obs = env.reset()
episode_reward = 0
episodic_return = 0
for global_step in range(args.total_timesteps):
# ALGO LOGIC: put action logic here
epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step)
Expand All @@ -576,11 +576,11 @@ def __iter__(self):

# TRY NOT TO MODIFY: execute the game and log data.
next_obs, reward, done, info = env.step(action)
episode_reward += reward
episodic_return += reward

# TRY NOT TO MODIFY: record rewards for plotting purposes
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)

Expand All @@ -590,7 +590,7 @@ def __iter__(self):
# important to note that because `EpisodicLifeEnv` wrapper is applied,
# the real episode reward is actually the sum of episode reward of 5 lives
# which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
obs, episode_reward = env.reset(), 0
obs, episodic_return = env.reset(), 0

if global_step % args.train_frequency == 0:
# s_obs, s_actions, s_rewards, s_next_obses, s_dones = rb.sample(args.batch_size)
Expand Down
8 changes: 4 additions & 4 deletions cleanrl/offline/offline_dqn_cql_atari_visual.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def __iter__(self):
print(q_network)
# TRY NOT TO MODIFY: start the game
obs = env.reset()
episode_reward = 0
episodic_return = 0

for global_step in range(args.total_timesteps):
# ALGO LOGIC: put action logic here
Expand All @@ -580,11 +580,11 @@ def __iter__(self):

# TRY NOT TO MODIFY: execute the game and log data.
next_obs, reward, done, info = env.step(action)
episode_reward += reward
episodic_return += reward

# TRY NOT TO MODIFY: record rewards for plotting purposes
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/epsilon", epsilon, global_step)

Expand All @@ -594,7 +594,7 @@ def __iter__(self):
# important to note that because `EpisodicLifeEnv` wrapper is applied,
# the real episode reward is actually the sum of episode reward of 5 lives
# which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
obs, episode_reward = env.reset(), 0
obs, episodic_return = env.reset(), 0

if global_step % args.train_frequency == 0:
# s_obs, s_actions, s_rewards, s_next_obses, s_dones = rb.sample(args.batch_size)
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/ppo_pettingzoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def get_value(self, x):

for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
break

Expand Down
2 changes: 1 addition & 1 deletion cleanrl/rnd_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ def forward(self, next_obs):
for idx, info in enumerate(infos):
if "episode" in info.keys():
print(
f"global_step={global_step}, episode_reward={info['episode']['r']}, curiosity_reward={curiosity_rewards[step][idx]}"
f"global_step={global_step}, episodic_return={info['episode']['r']}, curiosity_reward={curiosity_rewards[step][idx]}"
)
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episode_curiosity_reward", curiosity_rewards[step][idx], global_step)
Expand Down
2 changes: 1 addition & 1 deletion cleanrl/sac_continuous_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def to(self, device):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
break

Expand Down
2 changes: 1 addition & 1 deletion cleanrl/td3_continuous_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def forward(self, x):
# TRY NOT TO MODIFY: record rewards for plotting purposes
for info in infos:
if "episode" in info.keys():
print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
break

Expand Down
4 changes: 2 additions & 2 deletions cleanrl_utils/paper_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"--wandb-project", type=str, default="cleanrl/cleanrl.benchmark", help="the name of wandb project (e.g. cleanrl/cleanrl)"
)
parser.add_argument(
"--feature-of-interest", type=str, default="charts/episode_reward", help="which feature to be plotted on the y-axis"
"--feature-of-interest", type=str, default="charts/episodic_return", help="which feature to be plotted on the y-axis"
)
parser.add_argument("--hyper-params-tuned", nargs="+", default=[], help="the hyper parameters tuned")
# parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
Expand Down Expand Up @@ -67,7 +67,7 @@
"td3_continuous_action": "TD3",
}

# args.feature_of_interest = 'charts/episode_reward'
# args.feature_of_interest = 'charts/episodic_return'
feature_name = args.feature_of_interest.replace("/", "_")
if not os.path.exists(feature_name):
os.makedirs(feature_name)
Expand Down
4 changes: 2 additions & 2 deletions cleanrl_utils/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"--wandb-project", type=str, default="cleanrl/cleanrl.benchmark", help="the name of wandb project (e.g. cleanrl/cleanrl)"
)
parser.add_argument(
"--feature-of-interest", type=str, default="charts/episode_reward", help="which feature to be plotted on the y-axis"
"--feature-of-interest", type=str, default="charts/episodic_return", help="which feature to be plotted on the y-axis"
)
parser.add_argument("--hyper-params-tuned", nargs="+", default=[], help="the hyper parameters tuned")
# parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
Expand Down Expand Up @@ -62,7 +62,7 @@
# 'ppo_no_adj': 'Naive invalid action masking',
}

# args.feature_of_interest = 'charts/episode_reward'
# args.feature_of_interest = 'charts/episodic_return'
feature_name = args.feature_of_interest.replace("/", "_")
if not os.path.exists(feature_name):
os.makedirs(feature_name)
Expand Down

0 comments on commit 0b3f8ea

Please sign in to comment.