Updated docs and set device as "cuda" in distributed instead of raisi…

…ng error
pytorch · vfdev-5 · Oct 24, 2019 · Aug 1, 2019 · Aug 1, 2019 · Aug 1, 2019
commit ad8375c27644acac46b8a6ff9de1ea2bc022762c
diff --git a/examples/contrib/cifar10/main.py b/examples/contrib/cifar10/main.py
@@ -37,8 +37,8 @@ def run(output_path, config):
 
     distributed = backend is not None
     if distributed:
-        torch.cuda.device(config['local_rank'])
-        device = "cuda:{}".format(config['local_rank'])
+        torch.cuda.set_device(config['local_rank'])
+        device = "cuda"
 
     train_labelled_loader, test_loader = \
         get_train_test_loaders(path=config['data_path'],

diff --git a/ignite/metrics/accumulation.py b/ignite/metrics/accumulation.py
@@ -29,8 +29,10 @@ class VariableAccumulation(Metric):
             :class:`~ignite.engine.Engine`'s `process_function`'s output into the
             form expected by the metric. This can be useful if, for example, you have a multi-output model and
             you want to compute the metric with respect to one of the outputs.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/ignite/metrics/accuracy.py b/ignite/metrics/accuracy.py
@@ -109,8 +109,10 @@ def thresholded_output_transform(output):
             form expected by the metric. This can be useful if, for example, you have a multi-output model and
             you want to compute the metric with respect to one of the outputs.
         is_multilabel (bool, optional): flag to use in multilabel case. By default, False.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/ignite/metrics/confusion_matrix.py b/ignite/metrics/confusion_matrix.py
@@ -28,8 +28,10 @@ class ConfusionMatrix(Metric):
             :class:`~ignite.engine.Engine`'s `process_function`'s output into the
             form expected by the metric. This can be useful if, for example, you have a multi-output model and
             you want to compute the metric with respect to one of the outputs.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/ignite/metrics/loss.py b/ignite/metrics/loss.py
@@ -23,8 +23,10 @@ class Loss(Metric):
             keywords arguments.
         batch_size (callable): a callable taking a target tensor that returns the
             first dimension size (usually the batch size).
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/ignite/metrics/metric.py b/ignite/metrics/metric.py
@@ -22,8 +22,10 @@ class Metric(with_metaclass(ABCMeta, object)):
             :class:`~ignite.engine.Engine`'s `process_function`'s output into the
             form expected by the metric. This can be useful if, for example, you have a multi-output model and
             you want to compute the metric with respect to one of the outputs.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 
@@ -33,8 +35,7 @@ def __init__(self, output_transform=lambda x: x, device=None):
         # Check device if distributed is initialized:
         if torch.distributed.is_available() and torch.distributed.is_initialized():
             if device is None:
-                raise ValueError("Please provide the device for distributed computation. "
-                                 "In most of the cases, it should defined as 'cuda:local_rank'.")
+                device = "cuda"
             device = torch.device(device)
         self._device = device
         self._is_reduced = False

diff --git a/ignite/metrics/precision.py b/ignite/metrics/precision.py
@@ -101,8 +101,10 @@ def thresholded_output_transform(output):
             in multiclass case), otherwise, returns a tensor with the precision (for each class in multiclass case).
         is_multilabel (bool, optional) flag to use in multilabel case. By default, value is False. If True, average
             parameter should be True and the average is computed across samples, instead of classes.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/ignite/metrics/recall.py b/ignite/metrics/recall.py
@@ -58,8 +58,10 @@ def thresholded_output_transform(output):
             in multiclass case), otherwise, returns a tensor with the precision (for each class in multiclass case).
         is_multilabel (bool, optional) flag to use in multilabel case. By default, value is False. If True, average
             parameter should be True and the average is computed across samples, instead of classes.
-        device (str of torch.device): device specification in case of distributed computation usage.
-            In most of the cases, it should defined as "cuda:local_rank".
+        device (str of torch.device, optional): device specification in case of distributed computation usage.
+            In most of the cases, it can be defined as "cuda:local_rank" or "cuda"
+            if already set `torch.cuda.set_device(local_rank)`. By default, if a distributed process group is
+            initialized and available, device is set to `cuda`.
 
     """
 

diff --git a/tests/ignite/metrics/test_metric.py b/tests/ignite/metrics/test_metric.py
@@ -469,27 +469,10 @@ def test__sync_all_reduce():
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_distrib(local_rank, distributed_context_single_node):
 
-    def test_distrib_no_device_metric():
-        import torch.distributed as dist
-        assert dist.is_available() and dist.is_initialized()
-
-        with pytest.raises(ValueError, match=r"Please provide the device for distributed computation."):
-            DummyMetric()
-
-    test_distrib_no_device_metric()
-
     def test_distrib__sync_all_reduce():
         import torch.distributed as dist
         assert dist.is_available() and dist.is_initialized()
 
-        # # This test should be the first in the list, otherwise stucked
-        # # The following test aimed to check the transfer from another cuda device to the default one
-        # # However, this test sometimes gets stucked
-        # m = DummyMetric(device="cuda:{}".format(local_rank))
-        # t = torch.tensor(10, device="cuda:1")
-        # res = m._sync_all_reduce(t)
-        # assert res.item() == 10 * dist.get_world_size()
-
         device = "cuda:{}".format(local_rank)
 
         m = DummyMetric(device=device)