Replace episode_reward with episodic_return (vwxyzjn#125)

* Replace `episode_reward` with `episodic_return` * Quick fix: deleting the `wandb` folder at root
RL-code-lib · Feb 28, 2022 · 0b3f8ea · 0b3f8ea
1 parent b63315b
commit 0b3f8ea
Show file tree

Hide file tree

Showing 14 changed files with 25 additions and 25 deletions.
diff --git a/cleanrl/apex_dqn_atari.py b/cleanrl/apex_dqn_atari.py
@@ -768,7 +768,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu
     # TRY NOT TO MODIFY: start the game
     obs = env.reset()
     storage = []
-    episode_reward = 0
+    episodic_return = 0
     update_step = 0
     while True:
         update_step += 1
@@ -787,7 +787,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu
 
         # TRY NOT TO MODIFY: execute the game and log data.
         next_obs, reward, done, info = env.step(action)
-        episode_reward += reward
+        episodic_return += reward
         storage += [(obs, action, reward, next_obs, float(done))]
         with lock:
             global_step += 1
@@ -834,7 +834,7 @@ def act(args, experiment_name, i, q_network, target_network, lock, rollouts_queu
             # important to note that because `EpisodicLifeEnv` wrapper is applied,
             # the real episode reward is actually the sum of episode reward of 5 lives
             # which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
-            obs, episode_reward = env.reset(), 0
+            obs, episodic_return = env.reset(), 0
 
 
 def data_process(args, i, global_step, rollouts_queue, data_process_queue, data_process_back_queues, device):
@@ -1102,7 +1102,7 @@ def learn(
             m = stats_queue.get()
             if m[0] == "charts/episodic_return":
                 r, l = m[1], m[2]
-                print(f"global_step={global_step}, episode_reward={r}")
+                print(f"global_step={global_step}, episodic_return={r}")
                 writer.add_scalar("charts/episodic_return", r, global_step)
                 writer.add_scalar("charts/stats_queue_size", stats_queue.qsize(), global_step)
                 writer.add_scalar("charts/rollouts_queue_size", rollouts_queue.qsize(), global_step)

diff --git a/cleanrl/c51.py b/cleanrl/c51.py
@@ -180,7 +180,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 writer.add_scalar("charts/epsilon", epsilon, global_step)
                 break

diff --git a/cleanrl/c51_atari.py b/cleanrl/c51_atari.py
@@ -201,7 +201,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 writer.add_scalar("charts/epsilon", epsilon, global_step)
                 break

diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py
@@ -179,7 +179,7 @@ def forward(self, x):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 break
 

diff --git a/cleanrl/dqn.py b/cleanrl/dqn.py
@@ -161,7 +161,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 writer.add_scalar("charts/epsilon", epsilon, global_step)
                 break

diff --git a/cleanrl/dqn_atari.py b/cleanrl/dqn_atari.py
@@ -182,7 +182,7 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 writer.add_scalar("charts/epsilon", epsilon, global_step)
                 break

diff --git a/cleanrl/offline/offline_dqn_atari_visual.py b/cleanrl/offline/offline_dqn_atari_visual.py
@@ -561,7 +561,7 @@ def __iter__(self):
 print(q_network)
 # TRY NOT TO MODIFY: start the game
 obs = env.reset()
-episode_reward = 0
+episodic_return = 0
 for global_step in range(args.total_timesteps):
     # ALGO LOGIC: put action logic here
     epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step)
@@ -576,11 +576,11 @@ def __iter__(self):
 
     # TRY NOT TO MODIFY: execute the game and log data.
     next_obs, reward, done, info = env.step(action)
-    episode_reward += reward
+    episodic_return += reward
 
     # TRY NOT TO MODIFY: record rewards for plotting purposes
     if "episode" in info.keys():
-        print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
         writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
         writer.add_scalar("charts/epsilon", epsilon, global_step)
 
@@ -590,7 +590,7 @@ def __iter__(self):
         # important to note that because `EpisodicLifeEnv` wrapper is applied,
         # the real episode reward is actually the sum of episode reward of 5 lives
         # which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
-        obs, episode_reward = env.reset(), 0
+        obs, episodic_return = env.reset(), 0
 
     if global_step % args.train_frequency == 0:
         # s_obs, s_actions, s_rewards, s_next_obses, s_dones = rb.sample(args.batch_size)

diff --git a/cleanrl/offline/offline_dqn_cql_atari_visual.py b/cleanrl/offline/offline_dqn_cql_atari_visual.py
@@ -564,7 +564,7 @@ def __iter__(self):
 print(q_network)
 # TRY NOT TO MODIFY: start the game
 obs = env.reset()
-episode_reward = 0
+episodic_return = 0
 
 for global_step in range(args.total_timesteps):
     # ALGO LOGIC: put action logic here
@@ -580,11 +580,11 @@ def __iter__(self):
 
     # TRY NOT TO MODIFY: execute the game and log data.
     next_obs, reward, done, info = env.step(action)
-    episode_reward += reward
+    episodic_return += reward
 
     # TRY NOT TO MODIFY: record rewards for plotting purposes
     if "episode" in info.keys():
-        print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
         writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
         writer.add_scalar("charts/epsilon", epsilon, global_step)
 
@@ -594,7 +594,7 @@ def __iter__(self):
         # important to note that because `EpisodicLifeEnv` wrapper is applied,
         # the real episode reward is actually the sum of episode reward of 5 lives
         # which we record through `info['episode']['r']` provided by gym.wrappers.RecordEpisodeStatistics
-        obs, episode_reward = env.reset(), 0
+        obs, episodic_return = env.reset(), 0
 
     if global_step % args.train_frequency == 0:
         # s_obs, s_actions, s_rewards, s_next_obses, s_dones = rb.sample(args.batch_size)

diff --git a/cleanrl/ppo_pettingzoo.py b/cleanrl/ppo_pettingzoo.py
@@ -334,7 +334,7 @@ def get_value(self, x):
 
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 break
 

diff --git a/cleanrl/rnd_ppo.py b/cleanrl/rnd_ppo.py
@@ -845,7 +845,7 @@ def forward(self, next_obs):
         for idx, info in enumerate(infos):
             if "episode" in info.keys():
                 print(
-                    f"global_step={global_step}, episode_reward={info['episode']['r']}, curiosity_reward={curiosity_rewards[step][idx]}"
+                    f"global_step={global_step}, episodic_return={info['episode']['r']}, curiosity_reward={curiosity_rewards[step][idx]}"
                 )
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 writer.add_scalar("charts/episode_curiosity_reward", curiosity_rewards[step][idx], global_step)

diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py
@@ -222,7 +222,7 @@ def to(self, device):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 break
 

diff --git a/cleanrl/td3_continuous_action.py b/cleanrl/td3_continuous_action.py
@@ -184,7 +184,7 @@ def forward(self, x):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         for info in infos:
             if "episode" in info.keys():
-                print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
+                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                 writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                 break
 

diff --git a/cleanrl_utils/paper_plot.py b/cleanrl_utils/paper_plot.py
@@ -20,7 +20,7 @@
     "--wandb-project", type=str, default="cleanrl/cleanrl.benchmark", help="the name of wandb project (e.g. cleanrl/cleanrl)"
 )
 parser.add_argument(
-    "--feature-of-interest", type=str, default="charts/episode_reward", help="which feature to be plotted on the y-axis"
+    "--feature-of-interest", type=str, default="charts/episodic_return", help="which feature to be plotted on the y-axis"
 )
 parser.add_argument("--hyper-params-tuned", nargs="+", default=[], help="the hyper parameters tuned")
 # parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
@@ -67,7 +67,7 @@
     "td3_continuous_action": "TD3",
 }
 
-# args.feature_of_interest = 'charts/episode_reward'
+# args.feature_of_interest = 'charts/episodic_return'
 feature_name = args.feature_of_interest.replace("/", "_")
 if not os.path.exists(feature_name):
     os.makedirs(feature_name)

diff --git a/cleanrl_utils/plot.py b/cleanrl_utils/plot.py
@@ -20,7 +20,7 @@
     "--wandb-project", type=str, default="cleanrl/cleanrl.benchmark", help="the name of wandb project (e.g. cleanrl/cleanrl)"
 )
 parser.add_argument(
-    "--feature-of-interest", type=str, default="charts/episode_reward", help="which feature to be plotted on the y-axis"
+    "--feature-of-interest", type=str, default="charts/episodic_return", help="which feature to be plotted on the y-axis"
 )
 parser.add_argument("--hyper-params-tuned", nargs="+", default=[], help="the hyper parameters tuned")
 # parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
@@ -62,7 +62,7 @@
     # 'ppo_no_adj': 'Naive invalid action masking',
 }
 
-# args.feature_of_interest = 'charts/episode_reward'
+# args.feature_of_interest = 'charts/episodic_return'
 feature_name = args.feature_of_interest.replace("/", "_")
 if not os.path.exists(feature_name):
     os.makedirs(feature_name)