forked from facebookresearch/vissl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_distributed_engines.py
58 lines (47 loc) · 2.06 KB
/
run_distributed_engines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Wrapper to call torch.distributed.launch to run multi-gpu trainings.
Supports two engines: train and extract_features.
Supports SLURM as an option. Set config.SLURM.USE_SLURM=true to use slurm.
"""
import sys
from typing import List, Any
from hydra.experimental import initialize_config_module, compose
from vissl.utils.distributed_launcher import (
launch_distributed,
launch_distributed_on_slurm,
)
from vissl.utils.hydra_config import is_hydra_available, convert_to_attrdict
from vissl.utils.slurm import is_submitit_available
def hydra_main(overrides: List[Any]):
######################################################################################
# DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized
# correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU)
######################################################################################
from vissl.hooks import default_hook_generator
######################################################################################
print(f"####### overrides: {overrides}")
with initialize_config_module(config_module="vissl.config"):
cfg = compose("defaults", overrides=overrides)
args, config = convert_to_attrdict(cfg)
if config.SLURM.USE_SLURM:
assert (
is_submitit_available()
), "Please 'pip install submitit' to schedule jobs on SLURM"
launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config)
else:
launch_distributed(
cfg=config,
node_id=args.node_id,
engine_name=args.engine_name,
hook_generator=default_hook_generator,
)
if __name__ == "__main__":
"""
Example usage:
`python tools/run_distributed_engines.py config=test/integration_test/quick_simclr`
"""
overrides = sys.argv[1:]
assert is_hydra_available(), "Make sure to install hydra"
overrides.append("hydra.verbose=true")
hydra_main(overrides=overrides)