Merge branch 'develop' into feat/mpe_wrapper

instadeepai · Nov 7, 2024 · 787fc4d · 787fc4d
2 parents 715a5d7 + 905710f
commit 787fc4d
Show file tree

Hide file tree

Showing 35 changed files with 2,978 additions and 59 deletions.
diff --git a/.github/workflows/tests_linters.yaml b/.github/workflows/tests_linters.yaml
@@ -6,7 +6,7 @@ jobs:
   tests-and-linters:
     name: "Python ${{ matrix.python-version }} on ubuntu-latest"
     runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 20
 
     strategy:
       matrix:

diff --git a/mava/advanced_usage/ff_ippo_store_experience.py b/mava/advanced_usage/ff_ippo_store_experience.py
@@ -31,8 +31,8 @@
 from rich.pretty import pprint
 
 from mava.evaluator import get_eval_fn, make_ff_eval_act_fn
-from mava.networks.base import FeedForwardActor as Actor
-from mava.networks.base import FeedForwardValueNet as Critic
+from mava.networks import FeedForwardActor as Actor
+from mava.networks import FeedForwardValueNet as Critic
 from mava.systems.ppo.types import LearnerState, OptStates, Params, PPOTransition
 from mava.types import ActorApply, CriticApply, ExperimentOutput, MarlEnv, MavaState
 from mava.utils.checkpointing import Checkpointer

diff --git a/mava/configs/default/ff_sable.yaml b/mava/configs/default/ff_sable.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - logger: logger
+  - arch: anakin
+  - system: sable/ff_sable
+  - network: ff_retention
+  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - _self_
+
+hydra:
+  searchpath:
+    - file://mava/configs
diff --git a/mava/configs/default/rec_sable.yaml b/mava/configs/default/rec_sable.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - logger: logger
+  - arch: anakin
+  - system: sable/rec_sable
+  - network: rec_retention
+  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - _self_
+
+hydra:
+  searchpath:
+    - file://mava/configs
diff --git a/mava/configs/env/vector-connector.yaml b/mava/configs/env/vector-connector.yaml
@@ -0,0 +1,21 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: con-5x5x3a # [con-5x5x3a, con-7x7x5a, con-10x10x10a, con-15x15x23a]
+# Further environment config details in "con-10x10x5a" file.
+
+env_name: VectorMaConnector # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
+# This is false since the vector observation wrapper for connector cannot encode Agent IDs by default.
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: False
+
+kwargs:
+  {} # time limit set in scenario
diff --git a/mava/configs/network/ff_retention.yaml b/mava/configs/network/ff_retention.yaml
@@ -0,0 +1,10 @@
+# ---  Retention for ff-Sable ---
+net_config:
+  n_block: 1 # Number of blocks
+  embed_dim: 64 # Embedding dimension
+  n_head: 1 # Number of heads
+
+memory_config:
+  type: "ff_sable" # Type of the network.
+  agents_chunk_size: ~ # Size of the chunk: calculated over agents dim. This directly sets the sequence length for chunkwise retention
+  # If unspecified, the number of agents is used as the chunk size which means that we calculate full self-retention over all agents.
diff --git a/mava/configs/network/rec_retention.yaml b/mava/configs/network/rec_retention.yaml
@@ -0,0 +1,16 @@
+# ---  Retention for Memory Sable ---
+net_config:
+  n_block: 1 # Number of blocks
+  embed_dim: 64 # Embedding dimension
+  n_head: 1 # Number of heads
+
+memory_config:
+  type: "rec_sable" # Type of the network.
+  # --- Memory  factor ---
+  decay_scaling_factor: 0.8 # Decay scaling factor for the kappa parameter: kappa = kappa * decay_scaling_factor
+  # --- Positional encoding ---
+  timestep_positional_encoding: False # Timestamp positional encoding for Sable memory.
+  # --- Chunking ---
+  timestep_chunk_size: ~ # Size of the chunk: calculated over timesteps dim.
+  # For example a chunksize of 2 results in a sequence length of 2 * num_agents because there num_agents observations within a timestep
+  # If unspecified, the rollout length is used as the chunk size which means that the entire rollout is computed in parallel during training.
diff --git a/mava/configs/system/sable/ff_sable.yaml b/mava/configs/system/sable/ff_sable.yaml
@@ -0,0 +1,23 @@
+# --- Defaults ff-Sable ---
+
+total_timesteps: ~ # Set the total environment steps.
+# If unspecified, it's derived from num_updates; otherwise, num_updates adjusts based on this value.
+num_updates: 1000 # Number of updates
+seed: 42
+
+# --- Agent observations ---
+add_agent_id: True
+
+# --- RL hyperparameters ---
+actor_lr: 2.5e-4 # Learning rate for Sable network.
+update_batch_size: 2 # Number of vectorised gradient updates per device.
+rollout_length: 128 # Number of environment steps per vectorised environment.
+ppo_epochs: 4 # Number of ppo epochs per training data batch.
+num_minibatches: 2 # Number of minibatches per ppo epoch.
+gamma: 0.99 # Discounting factor.
+gae_lambda: 0.95 # Lambda value for GAE computation.
+clip_eps: 0.2 # Clipping value for PPO updates and value function.
+ent_coef: 0.01 # Entropy regularisation term for loss function.
+vf_coef: 0.5 # Critic weight in
+max_grad_norm: 0.5 # Maximum norm of the gradients for a weight update.
+decay_learning_rates: False # Whether learning rates should be linearly decayed during training.
diff --git a/mava/configs/system/sable/rec_sable.yaml b/mava/configs/system/sable/rec_sable.yaml
@@ -0,0 +1,23 @@
+# --- Defaults Memory Sable ---
+
+total_timesteps: ~ # Set the total environment steps.
+# If unspecified, it's derived from num_updates; otherwise, num_updates adjusts based on this value.
+num_updates: 1000 # Number of updates
+seed: 42
+
+# --- Agent observations ---
+add_agent_id: True
+
+# --- RL hyperparameters ---
+actor_lr: 2.5e-4 # Learning rate for Sable network.
+update_batch_size: 2 # Number of vectorised gradient updates per device.
+rollout_length: 128 # Number of environment steps per vectorised environment.
+ppo_epochs: 4 # Number of ppo epochs per training data batch.
+num_minibatches: 2 # Number of minibatches per ppo epoch.
+gamma: 0.99 # Discounting factor.
+gae_lambda: 0.95 # Lambda value for GAE computation.
+clip_eps: 0.2 # Clipping value for PPO updates and value function.
+ent_coef: 0.01 # Entropy regularisation term for loss function.
+vf_coef: 0.5 # Critic weight in
+max_grad_norm: 0.5 # Maximum norm of the gradients for a weight update.
+decay_learning_rates: False # Whether learning rates should be linearly decayed during training.
diff --git a/mava/networks/__init__.py b/mava/networks/__init__.py
@@ -22,3 +22,4 @@
     RecurrentValueNet,
     ScannedRNN,
 )
+from mava.networks.sable_network import SableNetwork
diff --git a/mava/networks/attention.py b/mava/networks/attention.py
@@ -42,36 +42,36 @@ def setup(self) -> None:
     def __call__(self, key: chex.Array, value: chex.Array, query: chex.Array) -> chex.Array:
         # Shape names:
         # B: batch size
-        # L: sequence length
+        # S: sequence length
         # E: embedding dimension
         # hs: head size
         # nh: number of heads
 
-        B, L, D = key.shape
+        B, S, D = key.shape
 
         # calculate query, key, values for all heads in batch and move
         # head forward to be the batch dim
-        # (B, L, E) -> (B, nh, L, hs)
-        k = self.key(key).reshape(B, L, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
-        q = self.query(query).reshape(B, L, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
-        v = self.value(value).reshape(B, L, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
+        # (B, S, E) -> (B, nh, S, hs)
+        k = self.key(key).reshape(B, S, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
+        q = self.query(query).reshape(B, S, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
+        v = self.value(value).reshape(B, S, self.n_head, D // self.n_head).transpose((0, 2, 1, 3))
 
-        # causal attention: (B, nh, L, hs) x (B, nh, hs, L) -> (B, nh, L, L)
+        # causal attention: (B, nh, S, hs) x (B, nh, hs, S) -> (B, nh, S, S)
         att = jnp.matmul(q, k.transpose((0, 1, 3, 2))) * (1.0 / jnp.sqrt(k.shape[-1]))
 
         # mask out attention for all agents
         if self.masked:
             att = jnp.where(
-                self.mask[:, :, :L, :L] == 0,
+                self.mask[:, :, :S, :S] == 0,
                 jnp.finfo(jnp.float32).min,
                 att,
             )
 
         att = nn.softmax(att, axis=-1)
 
-        y = jnp.matmul(att, v)  # (B, nh, L, L) x (B, nh, L, hs) -> (B, nh, L, hs)
+        y = jnp.matmul(att, v)  # (B, nh, S, S) x (B, nh, S, hs) -> (B, nh, S, hs)
         # re-assemble all head outputs side by side
         y = y.transpose((0, 2, 1, 3))
-        y = y.reshape(B, L, D)
+        y = y.reshape(B, S, D)
 
-        return self.proj(y)  # (B, L, D)
+        return self.proj(y)  # (B, S, D)