override .cuda() to check if model is already quantized (#25166)

huggingface · Jul 28, 2023 · 2a78720 · 2a78720
1 parent c1dba11
commit 2a78720
Showing 1 changed file with 10 additions and 0 deletions.
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -1912,6 +1912,16 @@ def get_memory_footprint(self, return_buffers=True):
             mem = mem + mem_bufs
         return mem
 
+    def cuda(self, *args, **kwargs):
+        # Checks if the model has been loaded in 8-bit
+        if getattr(self, "is_quantized", False):
+            raise ValueError(
+                "Calling `cuda()` is not supported for `4-bit` or `8-bit` quantized models. Please use the model as it is, since the"
+                " model has already been set to the correct devices and casted to the correct `dtype`."
+            )
+        else:
+            return super().cuda(*args, **kwargs)
+
     def to(self, *args, **kwargs):
         # Checks if the model has been loaded in 8-bit
         if getattr(self, "is_quantized", False):