-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustom_agent.py
141 lines (108 loc) · 5.36 KB
/
custom_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from typing import List, Dict, Any, Optional
from textworld import EnvInfos
class CustomAgent:
""" Template agent for the TextWorld competition. """
def __init__(self) -> None:
self._initialized = False
self._epsiode_has_started = False
def train(self) -> None:
""" Tell the agent it is in training mode. """
pass # [You can insert code here.]
def eval(self) -> None:
""" Tell the agent it is in evaluation mode. """
pass # [You can insert code here.]
def select_additional_infos(self) -> EnvInfos:
"""
Returns what additional information should be made available at each game step.
Requested information will be included within the `infos` dictionary
passed to `CustomAgent.act()`. To request specific information, create a
:py:class:`textworld.EnvInfos <textworld.envs.wrappers.filter.EnvInfos>`
and set the appropriate attributes to `True`. The possible choices are:
* `description`: text description of the current room, i.e. output of the `look` command;
* `inventory`: text listing of the player's inventory, i.e. output of the `inventory` command;
* `max_score`: maximum reachable score of the game;
* `objective`: objective of the game described in text;
* `entities`: names of all entities in the game;
* `verbs`: verbs understood by the the game;
* `command_templates`: templates for commands understood by the the game;
* `admissible_commands`: all commands relevant to the current state;
In addition to the standard information, game specific information
can be requested by appending corresponding strings to the `extras`
attribute. For this competition, the possible extras are:
* `'recipe'`: description of the cookbook;
* `'walkthrough'`: one possible solution to the game (not guaranteed to be optimal);
Example:
Here is an example of how to request information and retrieve it.
>>> from textworld import EnvInfos
>>> request_infos = EnvInfos(description=True, inventory=True, extras=["recipe"])
...
>>> env = gym.make(env_id)
>>> ob, infos = env.reset()
>>> print(infos["description"])
>>> print(infos["inventory"])
>>> print(infos["extra.recipe"])
Notes:
The following information *won't* be available at test time:
* 'walkthrough'
Requesting additional infos comes with some penalty (called handicap).
The exact penalty values will be defined in function of the average
scores achieved by agents using the same handicap.
Handicap is defined as follows
max_score, has_won, has_lost, # Handicap 0
description, inventory, verbs, objective, # Handicap 1
command_templates, # Handicap 2
entities, # Handicap 3
extras=["recipe"], # Handicap 4
admissible_commands, # Handicap 5
"""
return EnvInfos()
def _init(self) -> None:
""" Initialize the agent. """
self._initialized = True
# [You can insert code here.]
def _start_episode(self, obs: List[str], infos: Dict[str, List[Any]]) -> None:
"""
Prepare the agent for the upcoming episode.
Arguments:
obs: Initial feedback for each game.
infos: Additional information for each game.
"""
if not self._initialized:
self._init()
self._epsiode_has_started = True
# [You can insert code here.]
def _end_episode(self, obs: List[str], scores: List[int], infos: Dict[str, List[Any]]) -> None:
"""
Tell the agent the episode has terminated.
Arguments:
obs: Previous command's feedback for each game.
score: The score obtained so far for each game.
infos: Additional information for each game.
"""
self._epsiode_has_started = False
# [You can insert code here.]
def act(self, obs: List[str], scores: List[int], dones: List[bool], infos: Dict[str, List[Any]]) -> Optional[List[str]]:
"""
Acts upon the current list of observations.
One text command must be returned for each observation.
Arguments:
obs: Previous command's feedback for each game.
scores: The score obtained so far for each game.
dones: Whether a game is finished.
infos: Additional information for each game.
Returns:
Text commands to be performed (one per observation).
If episode had ended (e.g. `all(dones)`), the returned
value is ignored.
Notes:
Commands returned for games marked as `done` have no effect.
The states for finished games are simply copy over until all
games are done.
"""
if all(dones):
self._end_episode(obs, scores, infos)
return # Nothing to return.
if not self._epsiode_has_started:
self._start_episode(obs, infos)
# [Insert your code here to obtain the commands.]
return ["wait"] * len(obs) # No-op