Merge branch 'develop' into chore/ppo-system-cleanup

instadeepai · Nov 13, 2024 · 27bdc2f · 27bdc2f
2 parents 16c828e + 3264886
commit 27bdc2f
Show file tree

Hide file tree

Showing 53 changed files with 2,007 additions and 120 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -2,7 +2,7 @@
 
 # Default code owners for repo
 
-* @arnupretorius @DriesSmit @RuanJohn @jcformanek @siddarthsingh1 @sash-a @OmaymaMahjoub @ulricharmel @callumtilbury @WiemKhlifi
+* @RuanJohn @sash-a @OmaymaMahjoub @WiemKhlifi @SimonDuToit @Louay-Ben-nessir
 
 # Add specific code owners for certain files or folders below
 

diff --git a/mava/configs/arch/anakin.yaml b/mava/configs/arch/anakin.yaml
@@ -1,4 +1,5 @@
 # --- Anakin config ---
+architecture_name: anakin
 
 # --- Training ---
 num_envs: 16  # Number of vectorised environments per device.

diff --git a/mava/configs/arch/sebulba.yaml b/mava/configs/arch/sebulba.yaml
@@ -0,0 +1,25 @@
+# --- Sebulba config ---
+architecture_name: sebulba
+
+# --- Training ---
+num_envs: 32  # number of environments per thread.
+
+# --- Evaluation ---
+evaluation_greedy: False # Evaluate the policy greedily. If True the policy will select
+  # an action which corresponds to the greatest logit. If false, the policy will sample
+  # from the logits.
+num_eval_episodes: 32 # Number of episodes to evaluate per evaluation.
+num_evaluation: 100 # Number of evenly spaced evaluations to perform during training.
+num_absolute_metric_eval_episodes: 320  # Number of episodes to evaluate the absolute metric (the final evaluation).
+absolute_metric: True # Whether the absolute metric should be computed. For more details
+# on the absolute metric please see: https://arxiv.org/abs/2209.10485
+
+# --- Sebulba devices config ---
+n_threads_per_executor: 2  # num of different threads/env batches per actor
+actor_device_ids: [0] # ids of actor devices
+learner_device_ids: [0] # ids of learner devices
+rollout_queue_size : 5
+# The size of the pipeline queue determines the extent of off-policy training allowed. A larger value permits more off-policy training.
+# Too large of a value with too many actors will lead to all of the updates getting wasted in old episodes
+# Too small of a value and the utility of having multiple actors is lost.
+# A value of 1 with a single actor leads to almost strictly on-policy training.
diff --git a/mava/configs/default/ff_hasac.yaml b/mava/configs/default/ff_hasac.yaml
@@ -4,7 +4,7 @@ defaults:
   - arch: anakin
   - system: sac/ff_hasac
   - network: mlp  # [mlp, cnn]
-  - env: mabrax  # [mabrax]
+  - env: mabrax  # [mabrax, mpe]
 
 hydra:
   searchpath:

diff --git a/mava/configs/default/ff_ippo.yaml b/mava/configs/default/ff_ippo.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/ff_ippo
   - network: mlp  # [mlp, cnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/ff_ippo_sebulba.yaml b/mava/configs/default/ff_ippo_sebulba.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - logger: logger
+  - arch: sebulba
+  - system: ppo/ff_ippo
+  - network: mlp  # [mlp, continuous_mlp, cnn]
+  - env: lbf_gym  # [rware_gym, lbf_gym, smaclite_gym]
+  - _self_
+
+hydra:
+  searchpath:
+    - file://mava/configs
diff --git a/mava/configs/default/ff_isac.yaml b/mava/configs/default/ff_isac.yaml
@@ -4,7 +4,7 @@ defaults:
   - arch: anakin
   - system: sac/ff_isac
   - network: mlp
-  - env: mabrax  # [mabrax]
+  - env: mabrax  # [mabrax, mpe]
 
 hydra:
   searchpath:

diff --git a/mava/configs/default/ff_mappo.yaml b/mava/configs/default/ff_mappo.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/ff_mappo
   - network: mlp  # [mlp, cnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/ff_masac.yaml b/mava/configs/default/ff_masac.yaml
@@ -4,7 +4,7 @@ defaults:
   - arch: anakin
   - system: sac/ff_masac
   - network: mlp
-  - env: mabrax  # [mabrax]
+  - env: mabrax  # [mabrax, mpe]
 
 hydra:
   searchpath:

diff --git a/mava/configs/default/ff_sable.yaml b/mava/configs/default/ff_sable.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: sable/ff_sable
   - network: ff_retention
-  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/mat.yaml b/mava/configs/default/mat.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: mat/mat
   - network: transformer
-  - env: rware # [gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware # [gigastep, lbf, mabrax, matrax, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/rec_ippo.yaml b/mava/configs/default/rec_ippo.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/rec_ippo
   - network: rnn  # [rnn, rcnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/rec_mappo.yaml b/mava/configs/default/rec_mappo.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: ppo/rec_mappo
   - network: rnn  # [rnn, rcnn]
-  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/default/rec_sable.yaml b/mava/configs/default/rec_sable.yaml
@@ -3,7 +3,7 @@ defaults:
   - arch: anakin
   - system: sable/rec_sable
   - network: rec_retention
-  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax]
+  - env: rware  # [cleaner, connector, gigastep, lbf, rware, smax, mabrax, mpe]
   - _self_
 
 hydra:

diff --git a/mava/configs/env/lbf_gym.yaml b/mava/configs/env/lbf_gym.yaml
@@ -0,0 +1,25 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+
+env_name: LevelBasedForaging  # Used for logging purposes.
+scenario:
+  name: lbforaging
+  task_name: Foraging-8x8-2p-1f-v3
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
+# This should not be changed.
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
+
+kwargs:
+  max_episode_steps: 100
diff --git a/mava/configs/env/mpe.yaml b/mava/configs/env/mpe.yaml
@@ -0,0 +1,19 @@
+# --- Environment Configs---
+defaults:
+  - _self_
+  - scenario: simple_spread_3ag # [simple_spread_3ag, simple_spread_5ag, simple_spread_10ag]
+
+env_name: MPE  # Used for logging purposes and selection of the corresponding wrapper.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: False
+
+kwargs:
+  # Note: We only support `Continuous` actions for now but the `Discrete` version works as well.
+  action_type: Continuous # Whether agent action spaces are "Continuous" or "Discrete".
diff --git a/mava/configs/env/rware_gym.yaml b/mava/configs/env/rware_gym.yaml
@@ -0,0 +1,25 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+
+env_name: RobotWarehouse  # Used for logging purposes.
+scenario:
+  name: rware
+  task_name: rware-tiny-2ag-v2  # [rware-tiny-2ag-v2, rware-tiny-4ag-v2, rware-tiny-4ag-easy-v2, rware-small-4ag-v2]
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
+# This should not be changed.
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
+
+kwargs:
+  max_episode_steps: 500
diff --git a/mava/configs/env/scenario/simple_spread_10ag.yaml b/mava/configs/env/scenario/simple_spread_10ag.yaml
@@ -0,0 +1,8 @@
+# The config of the simple_spread_10ag scenario.
+name: MPE_simple_spread_v3
+task_name: simple_spread_10ag
+
+task_config:
+  num_agents: 10
+  num_landmarks: 10
+  local_ratio: 0.5
diff --git a/mava/configs/env/scenario/simple_spread_3ag.yaml b/mava/configs/env/scenario/simple_spread_3ag.yaml
@@ -0,0 +1,8 @@
+# The config of the simple_spread_3ag scenario.
+name: MPE_simple_spread_v3
+task_name: simple_spread_3ag
+
+task_config:
+  num_agents: 3
+  num_landmarks: 3
+  local_ratio: 0.5
diff --git a/mava/configs/env/scenario/simple_spread_5ag.yaml b/mava/configs/env/scenario/simple_spread_5ag.yaml
@@ -0,0 +1,8 @@
+# The config of the simple_spread_5ag scenario.
+name: MPE_simple_spread_v3
+task_name: simple_spread_5ag
+
+task_config:
+  num_agents: 5
+  num_landmarks: 5
+  local_ratio: 0.5
diff --git a/mava/configs/env/smaclite_gym.yaml b/mava/configs/env/smaclite_gym.yaml
@@ -0,0 +1,25 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+
+env_name: SMACLite  # Used for logging purposes.
+scenario:
+  name: smaclite
+  task_name: smaclite/2s3z-v0  # smaclite/ + ['10m_vs_11m-v0', '27m_vs_30m-v0', '3s5z_vs_3s6z-v0', '2s3z-v0', '3s5z-v0', '2c_vs_64zg-v0', '2s_vs_1sc-v0', '3s_vs_5z-v0']
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
+# This should not be changed.
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: True
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
+
+kwargs:
+  max_episode_steps: 500