Skip to content

Commit

Permalink
Merge branch 'develop' into chore/ppo-system-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
sash-a authored Nov 6, 2024
2 parents b3d6006 + c86604c commit a9f2050
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 34 deletions.
21 changes: 21 additions & 0 deletions mava/configs/env/vector-connector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# ---Environment Configs---
defaults:
- _self_
- scenario: con-5x5x3a # [con-5x5x3a, con-7x7x5a, con-10x10x10a, con-15x15x23a]
# Further environment config details in "con-10x10x5a" file.

env_name: VectorMaConnector # Used for logging purposes.

# Defines the metric that will be used to evaluate the performance of the agent.
# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
eval_metric: episode_return

# Whether the environment observations encode implicit agent IDs. If True, the AgentID wrapper is not used.
# This is false since the vector observation wrapper for connector cannot encode Agent IDs by default.
implicit_agent_id: False
# Whether or not to log the winrate of this environment. This should not be changed as not all
# environments have a winrate metric.
log_win_rate: False

kwargs:
{} # time limit set in scenario
4 changes: 4 additions & 0 deletions mava/systems/mat/anakin/mat.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ def eval_act_fn(
config.system.num_updates > config.arch.num_evaluation
), "Number of updates per evaluation must be less than total number of updates."

assert (
config.arch.num_envs % config.system.num_minibatches == 0
), "Number of envs must be divisibile by number of minibatches."

# Calculate number of updates per evaluation.
config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
steps_per_rollout = (
Expand Down
4 changes: 4 additions & 0 deletions mava/systems/ppo/anakin/ff_ippo.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,10 @@ def run_experiment(_config: DictConfig) -> float:
config.system.num_updates > config.arch.num_evaluation
), "Number of updates per evaluation must be less than total number of updates."

assert (
config.arch.num_envs % config.system.num_minibatches == 0
), "Number of envs must be divisibile by number of minibatches."

# Calculate number of updates per evaluation.
config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
steps_per_rollout = (
Expand Down
4 changes: 4 additions & 0 deletions mava/systems/ppo/anakin/ff_mappo.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,10 @@ def run_experiment(_config: DictConfig) -> float:
config.system.num_updates > config.arch.num_evaluation
), "Number of updates per evaluation must be less than total number of updates."

assert (
config.arch.num_envs % config.system.num_minibatches == 0
), "Number of envs must be divisibile by number of minibatches."

# Calculate number of updates per evaluation.
config.system.num_updates_per_eval = config.system.num_updates // config.arch.num_evaluation
steps_per_rollout = (
Expand Down
4 changes: 4 additions & 0 deletions mava/systems/ppo/anakin/rec_ippo.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,10 @@ def run_experiment(_config: DictConfig) -> float:
config.system.rollout_length % config.system.recurrent_chunk_size == 0
), "Rollout length must be divisible by recurrent chunk size."

assert (
config.arch.num_envs % config.system.num_minibatches == 0
), "Number of envs must be divisibile by number of minibatches."

# Create the enviroments for train and eval.
env, eval_env = environments.make(config)

Expand Down
4 changes: 4 additions & 0 deletions mava/systems/ppo/anakin/rec_mappo.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,10 @@ def run_experiment(_config: DictConfig) -> float:
config.system.rollout_length % config.system.recurrent_chunk_size == 0
), "Rollout length must be divisible by recurrent chunk size."

assert (
config.arch.num_envs % config.system.num_minibatches == 0
), "Number of envs must be divisibile by number of minibatches."

# Create the enviroments for train and eval.
env, eval_env = environments.make(config=config, add_global_state=True)

Expand Down
61 changes: 28 additions & 33 deletions mava/utils/make_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, Tuple, Type
from typing import Tuple

import jaxmarl
import jumanji
Expand Down Expand Up @@ -46,23 +46,24 @@
RecordEpisodeMetrics,
RwareWrapper,
SmaxWrapper,
VectorConnectorWrapper,
)
from mava.wrappers.jaxmarl import JaxMarlWrapper

# Registry mapping environment names to their generator and wrapper classes.
_jumanji_registry = {
"RobotWarehouse-v0": {"generator": RwareRandomGenerator, "wrapper": RwareWrapper},
"LevelBasedForaging-v0": {"generator": LbfRandomGenerator, "wrapper": LbfWrapper},
"MaConnector-v2": {
"RobotWarehouse": {"generator": RwareRandomGenerator, "wrapper": RwareWrapper},
"LevelBasedForaging": {"generator": LbfRandomGenerator, "wrapper": LbfWrapper},
"MaConnector": {"generator": ConnectorRandomGenerator, "wrapper": ConnectorWrapper},
"VectorMaConnector": {
"generator": ConnectorRandomGenerator,
"wrapper": ConnectorWrapper,
"wrapper": VectorConnectorWrapper,
},
"Cleaner-v0": {"generator": CleanerRandomGenerator, "wrapper": CleanerWrapper},
"Cleaner": {"generator": CleanerRandomGenerator, "wrapper": CleanerWrapper},
}

# Registry mapping environment names directly to the corresponding wrapper classes.
_matrax_registry = {"Matrax": MatraxWrapper}
_jaxmarl_registry: Dict[str, Type[JaxMarlWrapper]] = {"Smax": SmaxWrapper, "MaBrax": MabraxWrapper}
_jaxmarl_registry = {"Smax": SmaxWrapper, "MaBrax": MabraxWrapper}
_gigastep_registry = {"Gigastep": GigastepWrapper}


Expand All @@ -83,9 +84,7 @@ def add_extra_wrappers(
return train_env, eval_env


def make_jumanji_env(
env_name: str, config: DictConfig, add_global_state: bool = False
) -> Tuple[MarlEnv, MarlEnv]:
def make_jumanji_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
"""
Create a Jumanji environments for training and evaluation.
Expand All @@ -101,24 +100,22 @@ def make_jumanji_env(
"""
# Config generator and select the wrapper.
generator = _jumanji_registry[env_name]["generator"]
generator = _jumanji_registry[config.env.env_name]["generator"]
generator = generator(**config.env.scenario.task_config)
wrapper = _jumanji_registry[env_name]["wrapper"]
wrapper = _jumanji_registry[config.env.env_name]["wrapper"]

# Create envs.
env_config = {**config.env.kwargs, **config.env.scenario.env_kwargs}
train_env = jumanji.make(env_name, generator=generator, **env_config)
eval_env = jumanji.make(env_name, generator=generator, **env_config)
train_env = jumanji.make(config.env.scenario.name, generator=generator, **env_config)
eval_env = jumanji.make(config.env.scenario.name, generator=generator, **env_config)
train_env = wrapper(train_env, add_global_state=add_global_state)
eval_env = wrapper(eval_env, add_global_state=add_global_state)

train_env, eval_env = add_extra_wrappers(train_env, eval_env, config)
return train_env, eval_env


def make_jaxmarl_env(
env_name: str, config: DictConfig, add_global_state: bool = False
) -> Tuple[MarlEnv, MarlEnv]:
def make_jaxmarl_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
"""
Create a JAXMARL environment.
Expand All @@ -134,16 +131,16 @@ def make_jaxmarl_env(
"""
kwargs = dict(config.env.kwargs)
if "smax" in env_name.lower():
if "smax" in config.env.env_name.lower():
kwargs["scenario"] = map_name_to_scenario(config.env.scenario.task_name)

# Create jaxmarl envs.
train_env: MarlEnv = _jaxmarl_registry[config.env.env_name](
jaxmarl.make(env_name, **kwargs),
jaxmarl.make(config.env.scenario.name, **kwargs),
add_global_state,
)
eval_env: MarlEnv = _jaxmarl_registry[config.env.env_name](
jaxmarl.make(env_name, **kwargs),
jaxmarl.make(config.env.scenario.name, **kwargs),
add_global_state,
)

Expand All @@ -152,9 +149,7 @@ def make_jaxmarl_env(
return train_env, eval_env


def make_matrax_env(
env_name: str, config: DictConfig, add_global_state: bool = False
) -> Tuple[MarlEnv, MarlEnv]:
def make_matrax_env(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, MarlEnv]:
"""
Creates Matrax environments for training and evaluation.
Expand All @@ -170,7 +165,7 @@ def make_matrax_env(
"""
# Select the Matrax wrapper.
wrapper = _matrax_registry[env_name]
wrapper = _matrax_registry[config.env.scenario.name]

# Create envs.
task_name = config["env"]["scenario"]["task_name"]
Expand All @@ -184,7 +179,7 @@ def make_matrax_env(


def make_gigastep_env(
env_name: str, config: DictConfig, add_global_state: bool = False
config: DictConfig, add_global_state: bool = False
) -> Tuple[MarlEnv, MarlEnv]:
"""
Create a Gigastep environment.
Expand All @@ -200,7 +195,7 @@ def make_gigastep_env(
A tuple of the environments.
"""
wrapper = _gigastep_registry[env_name]
wrapper = _gigastep_registry[config.env.scenario.name]

kwargs = config.env.kwargs
scenario = ScenarioBuilder.from_config(config.env.scenario.task_config)
Expand All @@ -226,15 +221,15 @@ def make(config: DictConfig, add_global_state: bool = False) -> Tuple[MarlEnv, M
A tuple of the environments.
"""
env_name = config.env.scenario.name
env_name = config.env.env_name

if env_name in _jumanji_registry:
return make_jumanji_env(env_name, config, add_global_state)
elif env_name in jaxmarl.registered_envs:
return make_jaxmarl_env(env_name, config, add_global_state)
return make_jumanji_env(config, add_global_state)
elif env_name in _jaxmarl_registry:
return make_jaxmarl_env(config, add_global_state)
elif env_name in _matrax_registry:
return make_matrax_env(env_name, config, add_global_state)
return make_matrax_env(config, add_global_state)
elif env_name in _gigastep_registry:
return make_gigastep_env(env_name, config, add_global_state)
return make_gigastep_env(config, add_global_state)
else:
raise ValueError(f"{env_name} is not a supported environment.")
1 change: 1 addition & 0 deletions mava/wrappers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
ConnectorWrapper,
LbfWrapper,
RwareWrapper,
VectorConnectorWrapper,
)
from mava.wrappers.matrax import MatraxWrapper
from mava.wrappers.observation import AgentIDWrapper
Loading

0 comments on commit a9f2050

Please sign in to comment.