fake_quant cachemask: remove Python bindings (#51878)

Summary: Pull Request resolved: #51878 `fake_quantize_per_tensor_affine_cachemask` and `fake_quantize_per_channel_affine_cachemask` are implementation details of `fake_quantize_per_tensor_affine` and `fake_quantize_per_channel_affine`, removing the Python bindings for them since there is no need to expose them. Test Plan: ``` python test/test_quantization.py TestFakeQuantize ``` Imported from OSS Reviewed By: albanD, bugra Differential Revision: D26314173 fbshipit-source-id: 733c93a3951453e739b6ed46b72fbad2244f6e97
pytorch · Feb 10, 2021 · f2464dd · f2464dd
1 parent 9e5bcc1
commit f2464dd
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 7 deletions.
diff --git a/test/quantization/test_workflow_module.py b/test/quantization/test_workflow_module.py
@@ -872,7 +872,7 @@ def _test_forward_per_tensor_cachemask_impl(self, device):
             scale, zero_point = float(scale), int(zero_point)
             quant_min, quant_max = obs._calculate_qmin_qmax()
 
-            Y_test, _mask = torch.fake_quantize_per_tensor_affine_cachemask(
+            Y_test = torch.fake_quantize_per_tensor_affine(
                 X, scale, zero_point, quant_min, quant_max)
             Y_ref = _fake_quantize_per_tensor_affine_reference(
                 X.cpu(), scale, zero_point, quant_min, quant_max).to(device)
@@ -899,7 +899,7 @@ def _test_backward_per_tensor_cachemask_impl(self, device):
             quant_min, quant_max = obs._calculate_qmin_qmax()
 
             # forward pass
-            Y_test, mask = torch.fake_quantize_per_tensor_affine_cachemask(
+            Y_test = torch.fake_quantize_per_tensor_affine(
                 X, scale, zero_point, quant_min, quant_max)
             Y_ref = _fake_quantize_per_tensor_affine_reference(
                 X.cpu(), scale, zero_point, quant_min, quant_max).to(device)
@@ -1246,7 +1246,7 @@ def _test_forward_per_channel_cachemask_impl(self, device):
 
             Y = _fake_quantize_per_channel_affine_reference(
                 X.cpu(), scale.cpu(), zero_point.cpu(), axis, quant_min, quant_max)
-            Y_prime, _mask = torch.fake_quantize_per_channel_affine_cachemask(
+            Y_prime = torch.fake_quantize_per_channel_affine(
                 X, scale, zero_point, axis, quant_min, quant_max)
             np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
 
@@ -1339,7 +1339,7 @@ def _test_backward_per_channel_cachemask_impl(self, device):
             zero_point = zero_point.to(torch.int64)
             quant_min, quant_max = obs._calculate_qmin_qmax()
             X.requires_grad_()
-            Y_prime, _mask = torch.fake_quantize_per_channel_affine_cachemask(
+            Y_prime = torch.fake_quantize_per_channel_affine(
                 X, scale, zero_point, axis, quant_min, quant_max)
             dout = torch.rand(X.shape, dtype=torch.float).to(device)
             dX = _fake_quantize_per_channel_affine_grad_reference(

diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py
@@ -82,7 +82,8 @@
     'set_data',
     '.*_overrideable',  # overrideable functions for backend extension
     'data', 'is_leaf', 'output_nr', '_version', 'requires_grad_', 'retain_grad', 'set_',
-    '_fw_primal'
+    '_fw_primal', 'fake_quantize_per_tensor_affine_cachemask',
+    'fake_quantize_per_channel_affine_cachemask',
 ]
 
 # These function signatures are not exposed to Python. Note that this signature

diff --git a/torch/overrides.py b/torch/overrides.py
@@ -391,9 +391,7 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
         torch.exp2: lambda input, out=None: -1,
         torch.expm1: lambda input, out=None: -1,
         torch.fake_quantize_per_channel_affine: lambda input, scale, zero_point, axis, quant_min, quant_max: -1,
-        torch.fake_quantize_per_channel_affine_cachemask: lambda input, scale, zero_point, axis, quant_min, quant_max: -1,
         torch.fake_quantize_per_tensor_affine: lambda input, scale, zero_point, quant_min, quant_max: -1,
-        torch.fake_quantize_per_tensor_affine_cachemask: lambda input, scale, zero_point, quant_min, quant_max: -1,
         torch.fbgemm_linear_fp16_weight: lambda input, packed_weight, bias: -1,
         torch.fbgemm_linear_fp16_weight_fp32_activation: lambda input, packed_weight, bias: -1,
         torch.fbgemm_linear_int8_weight: lambda input, weight, packed, col_offsets, weight_scale, weight_zero_point, bias: -1,