Merge branch 'develop' into chore/ppo-system-cleanup

instadeepai · Nov 6, 2024 · a9f2050 · a9f2050
2 parents b3d6006 + c86604c
commit a9f2050
Show file tree

Hide file tree

Showing 10 changed files with 212 additions and 34 deletions.
diff --git a/mava/configs/env/vector-connector.yaml b/mava/configs/env/vector-connector.yaml
@@ -0,0 +1,21 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: con-5x5x3a # [con-5x5x3a, con-7x7x5a, con-10x10x10a, con-15x15x23a]
+# Further environment config details in "con-10x10x5a" file.
+
+env_name: VectorMaConnector # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
+# This is false since the vector observation wrapper for connector cannot encode Agent IDs by default.
+implicit_agent_id: False
+# Whether or not to log the winrate of this environment. This should not be changed as not all
+# environments have a winrate metric.
+log_win_rate: False
+
+kwargs:
+  {} # time limit set in scenario
diff --git a/mava/systems/mat/anakin/mat.py b/mava/systems/mat/anakin/mat.py
@@ -455,6 +455,10 @@ def eval_act_fn(
         config.system.num_updates > config.arch.num_evaluation
     ), "Number of updates per evaluation must be less than total number of updates."
 
+    assert (
+        config.arch.num_envs % config.system.num_minibatches == 0
+    ), "Number of envs must be divisibile by number of minibatches."
+
     # Calculate number of updates per evaluation.
     config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
     steps_per_rollout = (

diff --git a/mava/systems/ppo/anakin/ff_ippo.py b/mava/systems/ppo/anakin/ff_ippo.py
@@ -454,6 +454,10 @@ def run_experiment(_config: DictConfig) -> float:
         config.system.num_updates > config.arch.num_evaluation
     ), "Number of updates per evaluation must be less than total number of updates."
 
+    assert (
+        config.arch.num_envs % config.system.num_minibatches == 0
+    ), "Number of envs must be divisibile by number of minibatches."
+
     # Calculate number of updates per evaluation.
     config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
     steps_per_rollout = (

diff --git a/mava/systems/ppo/anakin/ff_mappo.py b/mava/systems/ppo/anakin/ff_mappo.py
@@ -452,6 +452,10 @@ def run_experiment(_config: DictConfig) -> float:
         config.system.num_updates > config.arch.num_evaluation
     ), "Number of updates per evaluation must be less than total number of updates."
 
+    assert (
+        config.arch.num_envs % config.system.num_minibatches == 0
+    ), "Number of envs must be divisibile by number of minibatches."
+
     # Calculate number of updates per evaluation.
     config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
     steps_per_rollout = (

diff --git a/mava/systems/ppo/anakin/rec_ippo.py b/mava/systems/ppo/anakin/rec_ippo.py
@@ -565,6 +565,10 @@ def run_experiment(_config: DictConfig) -> float:
             config.system.rollout_length % config.system.recurrent_chunk_size == 0
         ), "Rollout length must be divisible by recurrent chunk size."
 
+        assert (
+            config.arch.num_envs % config.system.num_minibatches == 0
+        ), "Number of envs must be divisibile by number of minibatches."
+
     # Create the enviroments for train and eval.
     env, eval_env = environments.make(config)
 

diff --git a/mava/systems/ppo/anakin/rec_mappo.py b/mava/systems/ppo/anakin/rec_mappo.py
@@ -568,6 +568,10 @@ def run_experiment(_config: DictConfig) -> float:
             config.system.rollout_length % config.system.recurrent_chunk_size == 0
         ), "Rollout length must be divisible by recurrent chunk size."
 
+        assert (
+            config.arch.num_envs % config.system.num_minibatches == 0
+        ), "Number of envs must be divisibile by number of minibatches."
+
     # Create the enviroments for train and eval.
     env, eval_env = environments.make(config=config, add_global_state=True)
 

diff --git a/mava/utils/make_env.py b/mava/utils/make_env.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Tuple, Type
+from typing import Tuple
 
 import jaxmarl
 import jumanji
@@ -46,23 +46,24 @@
     RecordEpisodeMetrics,
     RwareWrapper,
     SmaxWrapper,
+    VectorConnectorWrapper,
 )
-from mava.wrappers.jaxmarl import JaxMarlWrapper
 
 # Registry mapping environment names to their generator and wrapper classes.
 _jumanji_registry = {
-    "RobotWarehouse-v0": {"generator": RwareRandomGenerator, "wrapper": RwareWrapper},
-    "LevelBasedForaging-v0": {"generator": LbfRandomGenerator, "wrapper": LbfWrapper},
-    "MaConnector-v2": {
+    "RobotWarehouse": {"generator": RwareRandomGenerator, "wrapper": RwareWrapper},
+    "LevelBasedForaging": {"generator": LbfRandomGenerator, "wrapper": LbfWrapper},
+    "MaConnector": {"generator": ConnectorRandomGenerator, "wrapper": ConnectorWrapper},
+    "VectorMaConnector": {
         "generator": ConnectorRandomGenerator,
-        "wrapper": ConnectorWrapper,
+        "wrapper": VectorConnectorWrapper,
     },
-    "Cleaner-v0": {"generator": CleanerRandomGenerator, "wrapper": CleanerWrapper},
+    "Cleaner": {"generator": CleanerRandomGenerator, "wrapper": CleanerWrapper},
 }
 
 # Registry mapping environment names directly to the corresponding wrapper classes.
 _matrax_registry = {"Matrax": MatraxWrapper}
-_jaxmarl_registry: Dict[str, Type[JaxMarlWrapper]] = {"Smax": SmaxWrapper, "MaBrax": MabraxWrapper}
+_jaxmarl_registry = {"Smax": SmaxWrapper, "MaBrax": MabraxWrapper}
 _gigastep_registry = {"Gigastep": GigastepWrapper}
 
 
@@ -83,9 +84,7 @@ def add_extra_wrappers(
     return train_env, eval_env
 
 
-def make_jumanji_env(
-    env_name: str, config: DictConfig, add_global_state: bool = False
-) -> Tuple[MarlEnv, MarlEnv]:
+def make_jumanji_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
     """
     Create a Jumanji environments for training and evaluation.
 
@@ -101,24 +100,22 @@ def make_jumanji_env(
 
     """
     # Config generator and select the wrapper.
-    generator = _jumanji_registry[env_name]["generator"]
+    generator = _jumanji_registry[config.env.env_name]["generator"]
     generator = generator(**config.env.scenario.task_config)
-    wrapper = _jumanji_registry[env_name]["wrapper"]
+    wrapper = _jumanji_registry[config.env.env_name]["wrapper"]
 
     # Create envs.
     env_config = {**config.env.kwargs, **config.env.scenario.env_kwargs}
-    train_env = jumanji.make(env_name, generator=generator, **env_config)
-    eval_env = jumanji.make(env_name, generator=generator, **env_config)
+    train_env = jumanji.make(config.env.scenario.name, generator=generator, **env_config)
+    eval_env = jumanji.make(config.env.scenario.name, generator=generator, **env_config)
     train_env = wrapper(train_env, add_global_state=add_global_state)
     eval_env = wrapper(eval_env, add_global_state=add_global_state)
 
     train_env, eval_env = add_extra_wrappers(train_env, eval_env, config)
     return train_env, eval_env
 
 
-def make_jaxmarl_env(
-    env_name: str, config: DictConfig, add_global_state: bool = False
-) -> Tuple[MarlEnv, MarlEnv]:
+def make_jaxmarl_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
     """
      Create a JAXMARL environment.
 
@@ -134,16 +131,16 @@ def make_jaxmarl_env(
 
     """
     kwargs = dict(config.env.kwargs)
-    if "smax" in env_name.lower():
+    if "smax" in config.env.env_name.lower():
         kwargs["scenario"] = map_name_to_scenario(config.env.scenario.task_name)
 
     # Create jaxmarl envs.
     train_env: MarlEnv = _jaxmarl_registry[config.env.env_name](
-        jaxmarl.make(env_name, **kwargs),
+        jaxmarl.make(config.env.scenario.name, **kwargs),
         add_global_state,
     )
     eval_env: MarlEnv = _jaxmarl_registry[config.env.env_name](
-        jaxmarl.make(env_name, **kwargs),
+        jaxmarl.make(config.env.scenario.name, **kwargs),
         add_global_state,
     )
 
@@ -152,9 +149,7 @@ def make_jaxmarl_env(
     return train_env, eval_env
 
 
-def make_matrax_env(
-    env_name: str, config: DictConfig, add_global_state: bool = False
-) -> Tuple[MarlEnv, MarlEnv]:
+def make_matrax_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
     """
     Creates Matrax environments for training and evaluation.
 
@@ -170,7 +165,7 @@ def make_matrax_env(
 
     """
     # Select the Matrax wrapper.
-    wrapper = _matrax_registry[env_name]
+    wrapper = _matrax_registry[config.env.scenario.name]
 
     # Create envs.
     task_name = config["env"]["scenario"]["task_name"]
@@ -184,7 +179,7 @@ def make_matrax_env(
 
 
 def make_gigastep_env(
-    env_name: str, config: DictConfig, add_global_state: bool = False
+    config: DictConfig, add_global_state: bool = False
 ) -> Tuple[MarlEnv, MarlEnv]:
     """
      Create a Gigastep environment.
@@ -200,7 +195,7 @@ def make_gigastep_env(
         A tuple of the environments.
 
     """
-    wrapper = _gigastep_registry[env_name]
+    wrapper = _gigastep_registry[config.env.scenario.name]
 
     kwargs = config.env.kwargs
     scenario = ScenarioBuilder.from_config(config.env.scenario.task_config)
@@ -226,15 +221,15 @@ def make(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, M
         A tuple of the environments.
 
     """
-    env_name = config.env.scenario.name
+    env_name = config.env.env_name
 
     if env_name in _jumanji_registry:
-        return make_jumanji_env(env_name, config, add_global_state)
-    elif env_name in jaxmarl.registered_envs:
-        return make_jaxmarl_env(env_name, config, add_global_state)
+        return make_jumanji_env(config, add_global_state)
+    elif env_name in _jaxmarl_registry:
+        return make_jaxmarl_env(config, add_global_state)
     elif env_name in _matrax_registry:
-        return make_matrax_env(env_name, config, add_global_state)
+        return make_matrax_env(config, add_global_state)
     elif env_name in _gigastep_registry:
-        return make_gigastep_env(env_name, config, add_global_state)
+        return make_gigastep_env(config, add_global_state)
     else:
         raise ValueError(f"{env_name} is not a supported environment.")
diff --git a/mava/wrappers/__init__.py b/mava/wrappers/__init__.py
@@ -22,6 +22,7 @@
     ConnectorWrapper,
     LbfWrapper,
     RwareWrapper,
+    VectorConnectorWrapper,
 )
 from mava.wrappers.matrax import MatraxWrapper
 from mava.wrappers.observation import AgentIDWrapper