Handle meta tensors in FX quantization (#142262)

Summary: If module being quantized contains a some meta tensors and some tensors with actual device, we should not fail quantization. Quantization should also not fail if new quantized module is created on a meta device. Differential Revision: D66895899 Pull Request resolved: #142262 Approved by: https://github.com/iamzainhuda
pytorch · Dec 21, 2024 · e97b97a · e97b97a · pytorchmergebot · Dec 21, 2024
1 parent daa3ffe
commit e97b97a
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/torch/ao/quantization/quantize.py b/torch/ao/quantization/quantize.py
@@ -300,8 +300,8 @@ def insert_activation_post_process(m, special_act_post_process=None):
 
 
 def _get_unique_devices_(module):
-    return {p.device for p in module.parameters()} | {
-        p.device for p in module.buffers()
+    return {p.device for p in module.parameters() if p.device.type != "meta"} | {
+        p.device for p in module.buffers() if p.device.type != "meta"
     }
 
 
@@ -779,7 +779,7 @@ def swap_module(
             # respect device affinity when swapping modules
             devices = _get_unique_devices_(mod)
             assert (
-                len(devices) <= 1
+                len(devices) <= 1 or (len(devices) == 2 and torch.device("meta") in devices)
             ), f"swap_module only works with cpu or single-device CUDA modules, but got devices {devices}"
             device = next(iter(devices)) if len(devices) > 0 else None
             if device: