-
Notifications
You must be signed in to change notification settings - Fork 327
/
tree.py
1174 lines (1014 loc) · 45.9 KB
/
tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
import weakref
from collections import deque
from typing import Any, Callable, Dict, List, Literal, Tuple
import torch
from tensordict import (
merge_tensordicts,
NestedKey,
TensorClass,
TensorDict,
TensorDictBase,
unravel_key,
)
from torchrl.data.map.tdstorage import TensorDictMap
from torchrl.data.map.utils import _plot_plotly_box, _plot_plotly_tree
from torchrl.data.replay_buffers.storages import ListStorage
from torchrl.data.tensor_specs import Composite
from torchrl.envs.common import EnvBase
class Tree(TensorClass["nocast"]):
"""Representation of a single MCTS (Monte Carlo Tree Search) Tree.
This class encapsulates the data and behavior of a tree node in an MCTS algorithm.
It includes attributes for storing information about the node, such as its children,
visit count, and rollout data. Methods are provided for traversing the tree,
computing statistics, and visualizing the tree structure.
It is somewhat indistinguishable from a node or a vertex - we use the term "Tree" when talking about
a node with children, "node" or "vertex" when talking about a place in the tree where a branching occurs.
A node in the tree is defined primarily by its ``hash`` value. Usually, a ``hash`` is determined by a unique
combination of state (or observation) and action. If one observation (found in the ``node`` attribute) has more than
one action associated, each branch will be stored in the ``subtree`` attribute as a stack of ``Tree`` instances.
Attributes:
count (int): The number of visits to this node.
index (torch.Tensor): Indices of the child nodes in the data map.
hash (torch.Tensor): A hash value for this node.
It may be the case that ``hash`` is ``None`` in the specific case where the root of the tree
has more than one action associated. In that case, each subtree branch will have a different action
associated and a hash correspoding to the ``(observation, action)`` pair.
node_id (int): A unique identifier for this node.
rollout (TensorDict): Rollout data following the observation encoded in this node, in a TED format.
If there are multiple actions taken at this node, subtrees are stored in the corresponding
entry. Rollouts can be reconstructed using the :meth:`~.rollout_from_path` method.
node (TensorDict): Data defining this node (e.g., observations) before the next branching.
Entries usually matches the ``in_keys`` in ``MCTSForest.node_map``.
subtree (Tree): A stack of subtrees produced when actions are taken.
num_children (int): The number of child nodes (read-only).
is_terminal (bool): whether the tree has children nodes (read-only).
If the tree is compact, ``is_terminal == True`` means that there are more than one child node in
``self.subtree``.
Methods:
__contains__: Whether another tree can be found in the tree.
vertices: Returns a dictionary containing all vertices in the tree. Keys must be paths, ids or hashes.
num_vertices: Returns the total number of vertices in the tree, with or without duplicates.
edges: Returns a list of edges in the tree.
valid_paths: Yields all valid paths in the tree.
max_length: Returns the maximum length of any path in the tree.
rollout_from_path: Reconstructs a rollout from a given path.
plot: Visualizes the tree using a specified backend and figure type.
get_node_by_id: returns the vertex given by its id in the tree.
get_node_by_hash: returns the vertex given by its hash in the forest.
"""
count: int | torch.Tensor = None
wins: int | torch.Tensor = None
index: torch.Tensor | None = None
# The hash is None if the node has more than one action associated
hash: int | None = None
node_id: int | None = None
# rollout following the observation encoded in node, in a TorchRL (TED) format
rollout: TensorDict | None = None
# The data specifying the node (typically an observation or a set of observations)
node_data: TensorDict | None = None
# Stack of subtrees. A subtree is produced when an action is taken.
subtree: "Tree" = None
# weakrefs to the parent(s) of the node
_parent: weakref.ref | List[weakref.ref] | None = None
# Specs: contains information such as action or observation keys and spaces.
# If present, they should be structured like env specs are:
# Composite(input_spec=Composite(full_state_spec=..., full_action_spec=...),
# output_spec=Composite(full_observation_spec=..., full_reward_spec=..., full_done_spec=...))
# where every leaf component is optional.
specs: Composite | None = None
@classmethod
def make_node(
cls,
data: TensorDictBase,
*,
device: torch.device | None = None,
batch_size: torch.Size | None = None,
specs: Composite | None = None,
) -> Tree:
"""Creates a new node given some data."""
if "next" in data.keys():
rollout = data
if not rollout.ndim:
rollout = rollout.unsqueeze(0)
subtree = TensorDict.lazy_stack([cls.make_node(data["next"][..., -1])])
else:
rollout = None
subtree = None
if device is None:
device = data.device
return cls(
count=torch.zeros(()),
wins=torch.zeros(()),
node=data.exclude("action", "next"),
rollout=rollout,
subtree=subtree,
device=device,
batch_size=batch_size,
)
# Specs
@property
def full_observation_spec(self):
"""The observation spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_observation_spec']`.
"""
return self.specs["output_spec", "full_observation_spec"]
@property
def full_reward_spec(self):
"""The reward spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_reward_spec']`.
"""
return self.specs["output_spec", "full_reward_spec"]
@property
def full_done_spec(self):
"""The done spec of the tree.
This is an alias for `Tree.specs['output_spec', 'full_done_spec']`.
"""
return self.specs["output_spec", "full_done_spec"]
@property
def full_state_spec(self):
"""The state spec of the tree.
This is an alias for `Tree.specs['input_spec', 'full_state_spec']`.
"""
return self.specs["input_spec", "full_state_spec"]
@property
def full_action_spec(self):
"""The action spec of the tree.
This is an alias for `Tree.specs['input_spec', 'full_action_spec']`.
"""
return self.specs["input_spec", "full_action_spec"]
@property
def selected_actions(self) -> torch.Tensor | TensorDictBase | None:
"""Returns a tensor containing all the selected actions branching out from this node."""
if self.subtree is None:
return None
return self.subtree.rollout[..., 0]["action"]
@property
def prev_action(self) -> torch.Tensor | TensorDictBase | None:
"""The action undertaken just before this node's observation was generated.
Returns:
a tensor, tensordict or None if the node has no parent.
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.branching_action` whenever the rollout data contains a single step.
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
"""
if self.rollout is None:
return None
return self.rollout[..., -1]["action"]
@property
def branching_action(self) -> torch.Tensor | TensorDictBase | None:
"""Returns the action that branched out to this particular node.
Returns:
a tensor, tensordict or None if the node has no parent.
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.prev_action` whenever the rollout data contains a single step.
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
"""
if self.rollout is None:
return None
return self.rollout[..., 0]["action"]
@property
def node_observation(self) -> torch.Tensor | TensorDictBase:
"""Returns the observation associated with this particular node.
This is the observation (or bag of observations) that defines the node before a branching occurs.
If the node contains a :attr:`~.rollout` attribute, the node observation is typically identical to the
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
is returned instead.
For a more consistent representation, see :attr:`~.node_observations`.
"""
# TODO: implement specs
return self.node_data["observation"]
@property
def node_observations(self) -> torch.Tensor | TensorDictBase:
"""Returns the observations associated with this particular node in a TensorDict format.
This is the observation (or bag of observations) that defines the node before a branching occurs.
If the node contains a :attr:`~.rollout` attribute, the node observation is typically identical to the
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
is returned instead.
For a more consistent representation, see :attr:`~.node_observations`.
"""
# TODO: implement specs
return self.node_data.select("observation")
@property
def visits(self) -> int | torch.Tensor:
"""Returns the number of visits associated with this particular node.
This is an alias for the :attr:`~.count` attribute.
"""
return self.count
@visits.setter
def visits(self, count):
self.count = count
def __setattr__(self, name: str, value: Any) -> None:
if name == "subtree" and value is not None:
wr = weakref.ref(self._tensordict)
if value._parent is None:
value._parent = wr
elif isinstance(value._parent, list):
value._parent.append(wr)
else:
value._parent = [value._parent, wr]
return super().__setattr__(name, value)
@property
def parent(self) -> Tree | None:
"""The parent of the node.
If the node has a parent and this object is still present in the python workspace, it will be returned by this
property.
For re-branching trees, this property may return a stack of trees where every index of the stack corresponds to
a different parent.
.. note:: the ``parent`` attribute will match in content but not in identity: the tensorclass object is recustructed
using the same tensors (i.e., tensors that point to the same memory locations).
Returns:
A ``Tree`` containing the parent data or ``None`` if the parent data is out of scope or the node is the root.
"""
parent = self._parent
if parent is not None:
# Check that all parents match
queue = [parent]
def maybe_flatten_list(maybe_nested_list):
if isinstance(maybe_nested_list, list):
for p in maybe_nested_list:
if isinstance(p, list):
queue.append(p)
else:
yield p()
else:
yield maybe_nested_list()
parent_result = None
while len(queue):
local_result = None
for r in maybe_flatten_list(queue.pop()):
if local_result is None:
local_result = r
elif r is not None and r is not local_result:
if isinstance(local_result, list):
local_result.append(r)
else:
local_result = [local_result, r]
if local_result is None:
continue
# replicate logic at macro level
if parent_result is None:
parent_result = local_result
else:
if isinstance(local_result, list):
local_result = [
r for r in local_result if r not in parent_result
]
else:
local_result = [local_result]
if isinstance(parent_result, list):
parent_result.extend(local_result)
else:
parent_result = [parent_result, *local_result]
if isinstance(parent_result, list):
return TensorDict.lazy_stack(
[self._from_tensordict(r) for r in parent_result]
)
return self._from_tensordict(parent_result)
@property
def num_children(self) -> int:
"""Number of children of this node.
Equates to the number of elements in the ``self.subtree`` stack.
"""
return len(self.subtree) if self.subtree is not None else 0
@property
def is_terminal(self) -> bool | torch.Tensor:
"""Returns True if the tree has no children nodes."""
if self.rollout is not None:
return self.rollout[..., -1]["next", "done"].squeeze(-1)
# If there is no rollout, there is no preceding data - either this is a root or it's a floating node.
# In either case, we assume that the node is not terminal.
return False
def fully_expanded(self, env: EnvBase) -> bool:
"""Returns True if the number of children is equal to the environment cardinality."""
cardinality = env.cardinality(self.node_data)
num_actions = self.num_children
return cardinality == num_actions
def get_vertex_by_id(self, id: int) -> Tree:
"""Goes through the tree and returns the node corresponding the given id."""
q = deque()
q.append(self)
while len(q):
tree = q.popleft()
if tree.node_id == id:
return tree
if tree.subtree is not None:
q.extend(tree.subtree.unbind(0))
raise ValueError(f"Node with id {id} not found.")
def get_vertex_by_hash(self, hash: int) -> Tree:
"""Goes through the tree and returns the node corresponding the given hash."""
q = deque()
q.append(self)
while len(q):
tree = q.popleft()
if tree.hash == hash:
return tree
if tree.subtree is not None:
q.extend(tree.subtree.unbind(0))
raise ValueError(f"Node with hash {hash} not found.")
def __contains__(self, other: Tree) -> bool:
hash = other.hash
for vertex in self.vertices().values():
if vertex.hash == hash:
return True
else:
return False
def vertices(
self, *, key_type: Literal["id", "hash", "path"] = "hash"
) -> Dict[int | Tuple[int], Tree]:
"""Returns a map containing the vertices of the Tree.
Keyword args:
key_type (Literal["id", "hash", "path"], optional): Specifies the type of key to use for the vertices.
- "id": Use the vertex ID as the key.
- "hash": Use a hash of the vertex as the key.
- "path": Use the path to the vertex as the key. This may lead to a dictionary with a longer length than
when ``"id"`` or ``"hash"`` are used as the same node may be part of multiple trajectories.
Defaults to ``"hash"``.
Defaults to an empty string, which may imply a default behavior.
Returns:
Dict[int | Tuple[int], Tree]: A dictionary mapping keys to Tree vertices.
"""
memo = set()
result = {}
q = deque()
cur_path = ()
q.append((self, cur_path))
use_hash = key_type == "hash"
use_id = key_type == "id"
use_path = key_type == "path"
while len(q):
tree, cur_path = q.popleft()
h = tree.hash
if h in memo and not use_path:
continue
memo.add(h)
if use_path:
result[cur_path] = tree
elif use_id:
result[tree.node_id] = tree
elif use_hash:
result[tree.node_id] = tree
else:
raise ValueError(
f"key_type must be either 'hash', 'id' or 'path'. Got {key_type}."
)
n = int(tree.num_children)
for i in range(n):
cur_path_tree = cur_path + (i,)
q.append((tree.subtree[i], cur_path_tree))
return result
def num_vertices(self, *, count_repeat: bool = False) -> int:
"""Returns the number of unique vertices in the Tree.
Keyword Args:
count_repeat (bool, optional): Determines whether to count repeated vertices.
- If ``False``, counts each unique vertex only once.
- If ``True``, counts vertices multiple times if they appear in different paths.
Defaults to ``False``.
Returns:
int: The number of unique vertices in the Tree.
"""
return len(
{
v.node_id
for v in self.vertices(
key_type="hash" if not count_repeat else "path"
).values()
}
)
def edges(self) -> List[Tuple[int, int]]:
"""Retrieves a list of edges in the tree.
Each edge is represented as a tuple of two node IDs: the parent node ID and the child node ID.
The tree is traversed using Breadth-First Search (BFS) to ensure all edges are visited.
Returns:
A list of tuples, where each tuple contains a parent node ID and a child node ID.
"""
result = []
q = deque()
parent = self.node_id
q.append((self, parent))
while len(q):
tree, parent = q.popleft()
n = int(tree.num_children)
for i in range(n):
node = tree.subtree[i]
node_id = node.node_id
result.append((parent, node_id))
q.append((node, node_id))
return result
def valid_paths(self):
"""Generates all valid paths in the tree.
A valid path is a sequence of child indices that starts at the root node and ends at a leaf node.
Each path is represented as a tuple of integers, where each integer corresponds to the index of a child node.
Yields:
tuple: A valid path in the tree.
"""
# Initialize a queue with the current tree node and an empty path
q = deque()
cur_path = ()
q.append((self, cur_path))
# Perform BFS traversal of the tree
while len(q):
# Dequeue the next tree node and its current path
tree, cur_path = q.popleft()
# Get the number of child nodes
n = int(tree.num_children)
# If this is a leaf node, yield the current path
if not n:
yield cur_path
# Iterate over the child nodes
for i in range(n):
cur_path_tree = cur_path + (i,)
q.append((tree.subtree[i], cur_path_tree))
def max_length(self):
"""Returns the maximum length of all valid paths in the tree.
The length of a path is defined as the number of nodes in the path.
If the tree is empty, returns 0.
Returns:
int: The maximum length of all valid paths in the tree.
"""
lengths = tuple(len(path) for path in self.valid_paths())
if len(lengths) == 0:
return 0
elif len(lengths) == 1:
return lengths[0]
return max(*lengths)
def rollout_from_path(self, path: Tuple[int]) -> TensorDictBase | None:
"""Retrieves the rollout data along a given path in the tree.
The rollout data is concatenated along the last dimension (dim=-1) for each node in the path.
If no rollout data is found along the path, returns ``None``.
Args:
path: A tuple of integers representing the path in the tree.
Returns:
The concatenated rollout data along the path, or None if no data is found.
"""
r = self.rollout
tree = self
rollouts = []
if r is not None:
rollouts.append(r)
for i in path:
tree = tree.subtree[i]
r = tree.rollout
if r is not None:
rollouts.append(r)
if rollouts:
return torch.cat(rollouts, dim=-1)
@staticmethod
def _label(info: List[str], tree: "Tree", root=False):
labels = []
for key in info:
if key == "hash":
hash = tree.hash
if hash is not None:
hash = hash.item()
v = f"hash={hash}"
elif root:
v = f"{key}=None"
else:
v = f"{key}={tree.rollout[key].mean().item()}"
labels.append(v)
return ", ".join(labels)
def plot(
self: Tree,
backend: str = "plotly",
figure: str = "tree",
info: List[str] = None,
make_labels: Callable[[Any, ...], Any] | None = None,
):
"""Plots a visualization of the tree using the specified backend and figure type.
Args:
backend: The plotting backend to use. Currently only supports 'plotly'.
figure: The type of figure to plot. Can be either 'tree' or 'box'.
info: A list of additional information to include in the plot (not currently used).
make_labels: An optional function to generate custom labels for the plot.
Raises:
NotImplementedError: If an unsupported backend or figure type is specified.
"""
if backend == "plotly":
if figure == "box":
_plot_plotly_box(self)
return
elif figure == "tree":
_plot_plotly_tree(self, make_labels=make_labels)
return
else:
pass
raise NotImplementedError(
f"Unknown plotting backend {backend} with figure {figure}."
)
class MCTSForest:
"""A collection of MCTS trees.
.. warning:: This class is currently under active development. Expect frequent API changes.
The class is aimed at storing rollouts in a storage, and produce trees based on a given root
in that dataset.
Keyword Args:
data_map (TensorDictMap, optional): the storage to use to store the data
(observation, reward, states etc). If not provided, it is lazily
initialized using :meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair`
using the list of :attr:`observation_keys` and :attr:`action_keys` as ``in_keys``.
node_map (TensorDictMap, optional): a map from the observation space to the index space.
Internally, the node map is used to gather all possible branches coming out of
a given node. For example, if an observation has two associated actions and outcomes
in the data map, then the :attr:`node_map` will return a data structure containing the
two indices in the :attr:`data_map` that correspond to these two outcomes.
If not provided, it is lazily initialized using
:meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair` using the list of
:attr:`observation_keys` as ``in_keys`` and the :class:`~torchrl.data.QueryModule` as
``out_keys``.
max_size (int, optional): the size of the maps.
If not provided, defaults to ``data_map.max_size`` if this can be found, then
``node_map.max_size``. If none of these are provided, defaults to `1000`.
done_keys (list of NestedKey, optional): the done keys of the environment. If not provided,
defaults to ``("done", "terminated", "truncated")``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
action_keys (list of NestedKey, optional): the action keys of the environment. If not provided,
defaults to ``("action",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
reward_keys (list of NestedKey, optional): the reward keys of the environment. If not provided,
defaults to ``("reward",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
observation_keys (list of NestedKey, optional): the observation keys of the environment. If not provided,
defaults to ``("observation",)``.
The :meth:`~.get_keys_from_env` can be used to automatically determine the keys.
excluded_keys (list of NestedKey, optional): a list of keys to exclude from the data storage.
consolidated (bool, optional): if ``True``, the data_map storage will be consolidated on disk.
Defaults to ``False``.
Examples:
>>> from torchrl.envs import GymEnv
>>> import torch
>>> from tensordict import TensorDict, LazyStackedTensorDict
>>> from torchrl.data import TensorDictMap, ListStorage
>>> from torchrl.data.map.tree import MCTSForest
>>>
>>> from torchrl.envs import PendulumEnv, CatTensors, UnsqueezeTransform, StepCounter
>>> # Create the MCTS Forest
>>> forest = MCTSForest()
>>> # Create an environment. We're using a stateless env to be able to query it at any given state (like an oracle)
>>> env = PendulumEnv()
>>> obs_keys = list(env.observation_spec.keys(True, True))
>>> state_keys = set(env.full_state_spec.keys(True, True)) - set(obs_keys)
>>> # Appending transforms to get an "observation" key that concatenates the observations together
>>> env = env.append_transform(
... UnsqueezeTransform(
... in_keys=obs_keys,
... out_keys=[("unsqueeze", key) for key in obs_keys],
... dim=-1
... )
... )
>>> env = env.append_transform(
... CatTensors([("unsqueeze", key) for key in obs_keys], "observation")
... )
>>> env = env.append_transform(StepCounter())
>>> env.set_seed(0)
>>> # Get a reset state, then make a rollout out of it
>>> reset_state = env.reset()
>>> rollout0 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
>>> # Append the rollout to the forest. We're removing the state entries for clarity
>>> rollout0 = rollout0.copy()
>>> rollout0.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout0)
>>> # The forest should have 6 elements (the length of the rollout)
>>> assert len(forest) == 6
>>> # Let's make another rollout from the same reset state
>>> rollout1 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
>>> rollout1.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout1)
>>> assert len(forest) == 12
>>> # Let's make another final rollout from an intermediate step in the second rollout
>>> rollout1b = env.rollout(6, auto_reset=False, tensordict=rollout1[3].exclude("next"))
>>> rollout1b.exclude(*state_keys, inplace=True)
>>> rollout1b.get("next").exclude(*state_keys, inplace=True)
>>> forest.extend(rollout1b)
>>> assert len(forest) == 18
>>> # Since we have 2 rollouts starting at the same state, our tree should have two
>>> # branches if we produce it from the reset entry. Take the state, and call `get_tree`:
>>> r = rollout0[0]
>>> # Let's get the compact tree that follows the initial reset. A compact tree is
>>> # a tree where nodes that have a single child are collapsed.
>>> tree = forest.get_tree(r)
>>> print(tree.max_length())
2
>>> print(list(tree.valid_paths()))
[(0,), (1, 0), (1, 1)]
>>> from tensordict import assert_close
>>> # We can manually rebuild the tree
>>> assert_close(
... rollout1,
... torch.cat([tree.subtree[1].rollout, tree.subtree[1].subtree[0].rollout]),
... intersection=True,
... )
True
>>> # Or we can rebuild it using the dedicated method
>>> assert_close(
... rollout1,
... tree.rollout_from_path((1, 0)),
... intersection=True,
... )
True
>>> tree.plot()
>>> tree = forest.get_tree(r, compact=False)
>>> print(tree.max_length())
9
>>> print(list(tree.valid_paths()))
[(0, 0, 0, 0, 0, 0), (1, 0, 0, 0, 0, 0), (1, 0, 0, 1, 0, 0, 0, 0, 0)]
>>> assert_close(
... rollout1,
... tree.rollout_from_path((1, 0, 0, 0, 0, 0)),
... intersection=True,
... )
True
"""
def __init__(
self,
*,
data_map: TensorDictMap | None = None,
node_map: TensorDictMap | None = None,
max_size: int | None = None,
done_keys: List[NestedKey] | None = None,
reward_keys: List[NestedKey] = None,
observation_keys: List[NestedKey] = None,
action_keys: List[NestedKey] = None,
excluded_keys: List[NestedKey] = None,
consolidated: bool | None = None,
):
self.data_map = data_map
self.node_map = node_map
if max_size is None:
if data_map is not None:
max_size = data_map.max_size
if max_size != getattr(node_map, "max_size", max_size):
raise ValueError(
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and node_map.max_size={node_map.max_size}."
)
elif node_map is not None:
max_size = node_map.max_size
else:
max_size = None
elif data_map is not None and max_size != getattr(
data_map, "max_size", max_size
):
raise ValueError(
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and max_size={max_size}."
)
elif node_map is not None and max_size != getattr(
node_map, "max_size", max_size
):
raise ValueError(
f"Conflicting max_size: got node_map.max_size={node_map.max_size} and max_size={max_size}."
)
self.max_size = max_size
self.done_keys = done_keys
self.action_keys = action_keys
self.reward_keys = reward_keys
self.observation_keys = observation_keys
self.excluded_keys = excluded_keys
self.consolidated = consolidated
@property
def done_keys(self) -> List[NestedKey]:
"""Done Keys.
Returns the keys used to indicate that an episode has ended.
The default done keys are "done", "terminated", and "truncated". These keys can be
used in the environment's output to signal the end of an episode.
Returns:
A list of strings representing the done keys.
"""
done_keys = getattr(self, "_done_keys", None)
if done_keys is None:
self._done_keys = done_keys = ["done", "terminated", "truncated"]
return done_keys
@done_keys.setter
def done_keys(self, value):
self._done_keys = _make_list_of_nestedkeys(value, "done_keys")
@property
def reward_keys(self) -> List[NestedKey]:
"""Reward Keys.
Returns the keys used to retrieve rewards from the environment's output.
The default reward key is "reward".
Returns:
A list of strings or tuples representing the reward keys.
"""
reward_keys = getattr(self, "_reward_keys", None)
if reward_keys is None:
self._reward_keys = reward_keys = ["reward"]
return reward_keys
@reward_keys.setter
def reward_keys(self, value):
self._reward_keys = _make_list_of_nestedkeys(value, "reward_keys")
@property
def action_keys(self) -> List[NestedKey]:
"""Action Keys.
Returns the keys used to retrieve actions from the environment's input.
The default action key is "action".
Returns:
A list of strings or tuples representing the action keys.
"""
action_keys = getattr(self, "_action_keys", None)
if action_keys is None:
self._action_keys = action_keys = ["action"]
return action_keys
@action_keys.setter
def action_keys(self, value):
self._action_keys = _make_list_of_nestedkeys(value, "action_keys")
@property
def observation_keys(self) -> List[NestedKey]:
"""Observation Keys.
Returns the keys used to retrieve observations from the environment's output.
The default observation key is "observation".
Returns:
A list of strings or tuples representing the observation keys.
"""
observation_keys = getattr(self, "_observation_keys", None)
if observation_keys is None:
self._observation_keys = observation_keys = ["observation"]
return observation_keys
@observation_keys.setter
def observation_keys(self, value):
self._observation_keys = _make_list_of_nestedkeys(value, "observation_keys")
@property
def excluded_keys(self) -> List[NestedKey] | None:
return self._excluded_keys
@excluded_keys.setter
def excluded_keys(self, value):
self._excluded_keys = _make_list_of_nestedkeys(value, "excluded_keys")
def get_keys_from_env(self, env: EnvBase):
"""Writes missing done, action and reward keys to the Forest given an environment.
Existing keys are not overwritten.
"""
if getattr(self, "_reward_keys", None) is None:
self.reward_keys = env.reward_keys
if getattr(self, "_done_keys", None) is None:
self.done_keys = env.done_keys
if getattr(self, "_action_keys", None) is None:
self.action_keys = env.action_keys
if getattr(self, "_observation_keys", None) is None:
self.observation_keys = env.observation_keys
@classmethod
def _write_fn_stack(cls, new, old=None):
# This function updates the old values by adding the new ones
# if and only if the new ones are not there.
# If the old value is not provided, we assume there are none and the
# `new` is just prepared.
# This involves unsqueezing the last dim (since we'll be stacking tensors
# and calling unique).
# The update involves calling cat along the last dim + unique
# which will keep only the new values that were unknown to
# the storage.
# We use this method to track all the indices that are associated with
# an observation. Every time a new index is obtained, it is stacked alongside
# the others.
if old is None:
# we unsqueeze the values to stack them along dim -1
result = new.apply(lambda x: x.unsqueeze(-1), filter_empty=False)
result.set(
"count", torch.ones(result.shape, dtype=torch.int, device=result.device)
)
else:
def cat(name, x, y):
if name == "count":
return x
if y.ndim < x.ndim:
y = y.unsqueeze(-1)
result = torch.cat([x, y], -1)
# Breaks on mps
if result.device.type == "mps":
result = result.cpu()
result = result.unique(dim=-1, sorted=False)
result = result.to("mps")
else:
result = result.unique(dim=-1, sorted=False)
return result
result = old.named_apply(cat, new, default=None)
result.set_("count", old.get("count") + 1)
return result
def _make_data_map(self, source, dest):
try:
kwargs = {}
if self.max_size is not None:
kwargs["max_size"] = self.max_size
self.data_map = TensorDictMap.from_tensordict_pair(
source,
dest,
in_keys=[*self.observation_keys, *self.action_keys],
consolidated=self.consolidated,
**kwargs,
)
if self.max_size is None:
self.max_size = self.data_map.max_size
except KeyError as err:
raise KeyError(
"A KeyError occurred during data map creation. This could be due to the wrong setting of a key in the MCTSForest constructor. Scroll up for more info."
) from err
def _make_node_map(self, source, dest):
kwargs = {}
if self.max_size is not None:
kwargs["max_size"] = self.max_size
self.node_map = TensorDictMap.from_tensordict_pair(
source,
dest,
in_keys=[*self.observation_keys],
out_keys=[
*self.data_map.query_module.out_keys, # hash and index
# *self.action_keys,
# *[("next", rk) for rk in self.reward_keys],
"count",
],
storage_constructor=ListStorage,
collate_fn=TensorDict.lazy_stack,
write_fn=self._write_fn_stack,
**kwargs,
)
if self.max_size is None:
self.max_size = self.data_map.max_size
def extend(self, rollout, *, return_node: bool = False):
source, dest = (
rollout.exclude("next").copy(),
rollout.select("next", *self.action_keys).copy(),
)
if self.excluded_keys is not None:
dest = dest.exclude(*self.excluded_keys, inplace=True)
dest.get("next").exclude(*self.excluded_keys, inplace=True)
if self.data_map is None:
self._make_data_map(source, dest)
# We need to set the action somewhere to keep track of what action lead to what child
# # Set the action in the 'next'
# dest[1:] = source[:-1].exclude(*self.done_keys)
# Add ('observation', 'action') -> ('next, observation')
self.data_map[source] = dest
value = source
if self.node_map is None:
self._make_node_map(source, dest)
# map ('observation',) -> ('indices',)
self.node_map[source] = TensorDict.lazy_stack(value.unbind(0))
if return_node:
return self.get_tree(rollout)
def add(self, step, *, return_node: bool = False):
source, dest = (
step.exclude("next").copy(),
step.select("next", *self.action_keys).copy(),
)
if self.data_map is None:
self._make_data_map(source, dest)