Skip to content

Commit

Permalink
Merge branch 'develop' into chore/ppo-system-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
sash-a committed Nov 13, 2024
2 parents 16c828e + 3264886 commit 27bdc2f
Show file tree
Hide file tree
Showing 53 changed files with 2,007 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Default code owners for repo

* @arnupretorius @DriesSmit @RuanJohn @jcformanek @siddarthsingh1 @sash-a @OmaymaMahjoub @ulricharmel @callumtilbury @WiemKhlifi
* @RuanJohn @sash-a @OmaymaMahjoub @WiemKhlifi @SimonDuToit @Louay-Ben-nessir

# Add specific code owners for certain files or folders below

Expand Down
1 change: 1 addition & 0 deletions mava/configs/arch/anakin.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# --- Anakin config ---
architecture_name: anakin

# --- Training ---
num_envs: 16 # Number of vectorised environments per device.
Expand Down
25 changes: 25 additions & 0 deletions mava/configs/arch/sebulba.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# --- Sebulba config ---
architecture_name: sebulba

# --- Training ---
num_envs: 32 # number of environments per thread.

# --- Evaluation ---
evaluation_greedy: False # Evaluate the policy greedily. If True the policy will select
# an action which corresponds to the greatest logit. If false, the policy will sample
# from the logits.
num_eval_episodes: 32 # Number of episodes to evaluate per evaluation.
num_evaluation: 100 # Number of evenly spaced evaluations to perform during training.
num_absolute_metric_eval_episodes: 320 # Number of episodes to evaluate the absolute metric (the final evaluation).
absolute_metric: True # Whether the absolute metric should be computed. For more details
# on the absolute metric please see: https://arxiv.org/abs/2209.10485

# --- Sebulba devices config ---
n_threads_per_executor: 2 # num of different threads/env batches per actor
actor_device_ids: [0] # ids of actor devices
learner_device_ids: [0] # ids of learner devices
rollout_queue_size : 5
# The size of the pipeline queue determines the extent of off-policy training allowed. A larger value permits more off-policy training.
# Too large of a value with too many actors will lead to all of the updates getting wasted in old episodes
# Too small of a value and the utility of having multiple actors is lost.
# A value of 1 with a single actor leads to almost strictly on-policy training.
2 changes: 1 addition & 1 deletion mava/configs/default/ff_hasac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defaults:
- arch: anakin
- system: sac/ff_hasac
- network: mlp # [mlp, cnn]
- env: mabrax # [mabrax]
- env: mabrax # [mabrax, mpe]

hydra:
searchpath:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/ff_ippo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: ppo/ff_ippo
- network: mlp # [mlp, cnn]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
- _self_

hydra:
Expand Down
11 changes: 11 additions & 0 deletions mava/configs/default/ff_ippo_sebulba.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defaults:
- logger: logger
- arch: sebulba
- system: ppo/ff_ippo
- network: mlp # [mlp, continuous_mlp, cnn]
- env: lbf_gym # [rware_gym, lbf_gym, smaclite_gym]
- _self_

hydra:
searchpath:
- file://mava/configs
2 changes: 1 addition & 1 deletion mava/configs/default/ff_isac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defaults:
- arch: anakin
- system: sac/ff_isac
- network: mlp
- env: mabrax # [mabrax]
- env: mabrax # [mabrax, mpe]

hydra:
searchpath:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/ff_mappo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: ppo/ff_mappo
- network: mlp # [mlp, cnn]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
- _self_

hydra:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/ff_masac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defaults:
- arch: anakin
- system: sac/ff_masac
- network: mlp
- env: mabrax # [mabrax]
- env: mabrax # [mabrax, mpe]

hydra:
searchpath:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/ff_sable.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: sable/ff_sable
- network: ff_retention
- env: rware # [cleaner, connector, gigastep, lbf, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, rware, smax, mpe]
- _self_

hydra:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/mat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: mat/mat
- network: transformer
- env: rware # [gigastep, lbf, mabrax, matrax, rware, smax]
- env: rware # [gigastep, lbf, mabrax, matrax, rware, smax, mpe]
- _self_

hydra:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/rec_ippo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: ppo/rec_ippo
- network: rnn # [rnn, rcnn]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
- _self_

hydra:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/rec_mappo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: ppo/rec_mappo
- network: rnn # [rnn, rcnn]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, mabrax, matrax, rware, smax, mpe]
- _self_

hydra:
Expand Down
2 changes: 1 addition & 1 deletion mava/configs/default/rec_sable.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
- arch: anakin
- system: sable/rec_sable
- network: rec_retention
- env: rware # [cleaner, connector, gigastep, lbf, rware, smax]
- env: rware # [cleaner, connector, gigastep, lbf, rware, smax, mabrax, mpe]
- _self_

hydra:
Expand Down
25 changes: 25 additions & 0 deletions mava/configs/env/lbf_gym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# ---Environment Configs---
defaults:
- _self_

env_name: LevelBasedForaging # Used for logging purposes.
scenario:
name: lbforaging
task_name: Foraging-8x8-2p-1f-v3

# Defines the metric that will be used to evaluate the performance of the agent.
# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
eval_metric: episode_return

# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
# This should not be changed.
implicit_agent_id: False
# Whether or not to log the winrate of this environment. This should not be changed as not all
# environments have a winrate metric.
log_win_rate: False

# Weather or not to sum the returned rewards over all of the agents.
use_shared_rewards: True

kwargs:
max_episode_steps: 100
19 changes: 19 additions & 0 deletions mava/configs/env/mpe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# --- Environment Configs---
defaults:
- _self_
- scenario: simple_spread_3ag # [simple_spread_3ag, simple_spread_5ag, simple_spread_10ag]

env_name: MPE # Used for logging purposes and selection of the corresponding wrapper.

# Defines the metric that will be used to evaluate the performance of the agent.
# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
eval_metric: episode_return

implicit_agent_id: False
# Whether or not to log the winrate of this environment. This should not be changed as not all
# environments have a winrate metric.
log_win_rate: False

kwargs:
# Note: We only support `Continuous` actions for now but the `Discrete` version works as well.
action_type: Continuous # Whether agent action spaces are "Continuous" or "Discrete".
25 changes: 25 additions & 0 deletions mava/configs/env/rware_gym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# ---Environment Configs---
defaults:
- _self_

env_name: RobotWarehouse # Used for logging purposes.
scenario:
name: rware
task_name: rware-tiny-2ag-v2 # [rware-tiny-2ag-v2, rware-tiny-4ag-v2, rware-tiny-4ag-easy-v2, rware-small-4ag-v2]

# Defines the metric that will be used to evaluate the performance of the agent.
# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
eval_metric: episode_return

# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
# This should not be changed.
implicit_agent_id: False
# Whether or not to log the winrate of this environment. This should not be changed as not all
# environments have a winrate metric.
log_win_rate: False

# Weather or not to sum the returned rewards over all of the agents.
use_shared_rewards: True

kwargs:
max_episode_steps: 500
8 changes: 8 additions & 0 deletions mava/configs/env/scenario/simple_spread_10ag.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The config of the simple_spread_10ag scenario.
name: MPE_simple_spread_v3
task_name: simple_spread_10ag

task_config:
num_agents: 10
num_landmarks: 10
local_ratio: 0.5
8 changes: 8 additions & 0 deletions mava/configs/env/scenario/simple_spread_3ag.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The config of the simple_spread_3ag scenario.
name: MPE_simple_spread_v3
task_name: simple_spread_3ag

task_config:
num_agents: 3
num_landmarks: 3
local_ratio: 0.5
8 changes: 8 additions & 0 deletions mava/configs/env/scenario/simple_spread_5ag.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The config of the simple_spread_5ag scenario.
name: MPE_simple_spread_v3
task_name: simple_spread_5ag

task_config:
num_agents: 5
num_landmarks: 5
local_ratio: 0.5
25 changes: 25 additions & 0 deletions mava/configs/env/smaclite_gym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# ---Environment Configs---
defaults:
- _self_

env_name: SMACLite # Used for logging purposes.
scenario:
name: smaclite
task_name: smaclite/2s3z-v0 # smaclite/ + ['10m_vs_11m-v0', '27m_vs_30m-v0', '3s5z_vs_3s6z-v0', '2s3z-v0', '3s5z-v0', '2c_vs_64zg-v0', '2s_vs_1sc-v0', '3s_vs_5z-v0']

# Defines the metric that will be used to evaluate the performance of the agent.
# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
eval_metric: episode_return

# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
# This should not be changed.
implicit_agent_id: False
# Whether or not to log the winrate of this environment. This should not be changed as not all
# environments have a winrate metric.
log_win_rate: True

# Weather or not to sum the returned rewards over all of the agents.
use_shared_rewards: True

kwargs:
max_episode_steps: 500
Loading

0 comments on commit 27bdc2f

Please sign in to comment.