From 262f54028618197e4504db7f36123f4555dcaee2 Mon Sep 17 00:00:00 2001 From: Sergei Laktionov Date: Sun, 2 Jul 2023 18:27:42 +0100 Subject: [PATCH] Revert "add context manager for recording, return s', change gym to gymnaasium in description" This reverts commit 59c7fa6ea896537c3bf9c95d3557f4b19c396492. --- week04_approx_rl/homework_pytorch_debug.ipynb | 4 ++-- week04_approx_rl/homework_pytorch_main.ipynb | 11 +++++------ week04_approx_rl/seminar_pytorch.ipynb | 11 ++++------- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/week04_approx_rl/homework_pytorch_debug.ipynb b/week04_approx_rl/homework_pytorch_debug.ipynb index 38c9251b..dc562122 100644 --- a/week04_approx_rl/homework_pytorch_debug.ipynb +++ b/week04_approx_rl/homework_pytorch_debug.ipynb @@ -349,8 +349,8 @@ "source": [ "def play_and_record(initial_state, agent, env, exp_replay, n_steps=1):\n", " \"\"\"\n", - " Play the game for exactly n_steps, record every (s,a,r,s', done) to replay buffer.\n", - " Whenever game ends due to termination or truncation, add record with done=terminated and reset the game.\n", + " Play the game for exactly n_steps, record every (s,a,r,s, done) to replay buffer.\n", + " Whenever game ends, add record with done=True and reset the game.\n", " It is guaranteed that env has terminated=False when passed to this function.\n", "\n", " PLEASE DO NOT RESET ENV UNLESS IT IS \"DONE\"\n", diff --git a/week04_approx_rl/homework_pytorch_main.ipynb b/week04_approx_rl/homework_pytorch_main.ipynb index 0b1fa882..64e699b3 100644 --- a/week04_approx_rl/homework_pytorch_main.ipynb +++ b/week04_approx_rl/homework_pytorch_main.ipynb @@ -1373,12 +1373,11 @@ "# record sessions\n", "from gymnasium.wrappers import RecordVideo\n", "\n", - "with make_env() as env, RecordVideo(\n", - " env=make_env(), video_folder=\"./videos\", episode_trigger=lambda episode_number: True\n", - ") as env_monitor:\n", - " sessions = [\n", - " evaluate(env_monitor, agent, n_games=n_lives, greedy=True) for _ in range(10)\n", - " ]\n" + "with RecordVideo(env=make_env(), video_folder='./videos',\n", + " episode_trigger = lambda episode_number: True) as env_monitor:\n", + " sessions = [evaluate(env_monitor, agent, n_games=n_lives,\n", + " greedy=True) for _ in range(10)]\n", + "env.close()" ] }, { diff --git a/week04_approx_rl/seminar_pytorch.ipynb b/week04_approx_rl/seminar_pytorch.ipynb index 760527f6..b32f4041 100644 --- a/week04_approx_rl/seminar_pytorch.ipynb +++ b/week04_approx_rl/seminar_pytorch.ipynb @@ -420,7 +420,7 @@ "source": [ "### Record videos\n", "\n", - "As usual, we now use `gymnasium.wrappers.RecordVideo` to record a video of our agent playing the game. Unlike our previous attempts with state binarization, this time we expect our agent to act ~~(or fail)~~ more smoothly since there's no more binarization error at play.\n", + "As usual, we now use `gym.wrappers.Monitor` to record a video of our agent playing the game. Unlike our previous attempts with state binarization, this time we expect our agent to act ~~(or fail)~~ more smoothly since there's no more binarization error at play.\n", "\n", "As you already did with tabular q-learning, we set epsilon=0 for final evaluation to prevent agent from exploring himself to death." ] @@ -437,12 +437,9 @@ "\n", "from gymnasium.wrappers import RecordVideo\n", "\n", - "with gym.make(\"CartPole-v0\", render_mode=\"rgb_array\") as record_env, RecordVideo(\n", - " record_env, video_folder=\"videos\"\n", - ") as env_monitor:\n", - " sessions = [\n", - " generate_session(env_monitor, epsilon=0, train=False) for _ in range(100)\n", - " ]\n" + "record_env = gym.make(\"CartPole-v0\", render_mode=\"rgb_array\")\n", + "with RecordVideo(record_env, video_folder=\"videos\") as env_monitor:\n", + " sessions = [generate_session(env_monitor, epsilon=0, train=False) for _ in range(100)]" ] }, {