-
Notifications
You must be signed in to change notification settings - Fork 326
/
gym-async-info-reader.py
91 lines (70 loc) · 2.86 KB
/
gym-async-info-reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
A toy example of executing a Gym environment asynchronously and gathering the info properly.
"""
import argparse
import gymnasium as gym
import numpy as np
from gymnasium import spaces
parser = argparse.ArgumentParser()
parser.add_argument("--use_wrapper", action="store_true")
# Create the dummy environment
class CustomEnv(gym.Env):
def __init__(self, render_mode=None):
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(3,))
self.action_space = spaces.Box(low=-np.inf, high=np.inf, shape=(1,))
def _get_info(self):
return {"field1": self.state**2}
def _get_obs(self):
return self.state.copy()
def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed)
self.state = np.zeros(self.observation_space.shape)
observation = self._get_obs()
info = self._get_info()
return observation, info
def step(self, action):
self.state += action.item()
truncated = False
terminated = False
reward = 1 if terminated else 0 # Binary sparse rewards
observation = self._get_obs()
info = self._get_info()
return observation, reward, terminated, truncated, info
if __name__ == "__main__":
import torch
from torchrl.data.tensor_specs import Unbounded
from torchrl.envs import check_env_specs, GymEnv, GymWrapper
args = parser.parse_args()
num_envs = 10
if args.use_wrapper:
# Option 1: using GymWrapper
env = gym.vector.AsyncVectorEnv([lambda: CustomEnv() for _ in range(num_envs)])
env = GymWrapper(env, device="cpu")
else:
# Option 2: using GymEnv directly, no need to call AsyncVectorEnv
gym.register("Custom-v0", CustomEnv)
env = GymEnv("Custom-v0", num_envs=num_envs)
keys = ["field1"]
specs = [
Unbounded(shape=(num_envs, 3), dtype=torch.float64),
]
# Create an info reader: this object will read the info and write its content to the tensordict
def reader(info, tensordict):
return tensordict.set("field1", np.stack(info["field1"]))
env.set_info_dict_reader(info_dict_reader=reader)
# Print the info readers (there should be 2: one to read the terminal states and another to read the 'field1')
print("readers", env.info_dict_reader)
# We need to unlock the specs to make them writable
env.observation_spec.unlock_()
env.observation_spec["field1"] = specs[0]
env.observation_spec.lock_()
# Check that we did a good job
check_env_specs(env)
td = env.reset()
print("reset data", td)
print("content of field1 (should be a 10x3 tensor)", td["field1"])