From b7353346133df7525f760cb2dda9759f8550cbbc Mon Sep 17 00:00:00 2001 From: Ruan de Kock Date: Tue, 22 Oct 2024 10:27:10 +0200 Subject: [PATCH] chore: extra comment on term_or_trunc vs terminal --- mava/systems/q_learning/anakin/rec_qmix.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mava/systems/q_learning/anakin/rec_qmix.py b/mava/systems/q_learning/anakin/rec_qmix.py index 145203e53..ad07e61d7 100644 --- a/mava/systems/q_learning/anakin/rec_qmix.py +++ b/mava/systems/q_learning/anakin/rec_qmix.py @@ -154,6 +154,10 @@ def replicate(x: Any) -> Any: init_hidden_state = replicate(init_hidden_state) init_acts = env.action_spec().generate_value() + + # NOTE: Term_or_trunc refers to the the joint done, ie. when all agents are done or when the + # episode has terminated. We use this exclusively in QMIX. Terminal refers to individual agent + # dones. We keep this in this file for consistency with IQL. init_transition = Transition( obs=init_obs, # (A, ...) action=init_acts, # (A,)