Revert "Add missing data types at torch export serialization (#138561)"

pytorchmergebot · pytorchmergebot · commit 09ce760fef7e · 2024-12-03T01:32:50.000Z
This reverts commit 1ef1b3b. Reverted #138561 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](#138561 (comment)))
diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py
@@ -1174,44 +1174,40 @@ def validate(self, model: torch.fx.GraphModule) -> None:
         self.assertIsNot(observers[0], observers[2])
         self.assertIsNot(observers[1], observers[2])
 
-    class DtypeActQuantizer(Quantizer):
-        def __init__(self, quant_dtype, op_name):
-            self.quant_dtype = quant_dtype
-            self.op_name = op_name
-
-        def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
-            quant_dtype = self.quant_dtype
-            info_fun = torch.iinfo if quant_dtype == torch.int16 else torch.finfo
-            activate_qspec = QuantizationSpec(
-                dtype=quant_dtype,
-                quant_min=int(info_fun(quant_dtype).min),
-                quant_max=int(info_fun(quant_dtype).max),
-                qscheme=torch.per_tensor_affine,
-                is_dynamic=False,
-                observer_or_fake_quant_ctr=observer.default_observer,
-            )
-            int8_qspec = QuantizationSpec(
-                dtype=torch.int8,
-                quant_min=-128,
-                quant_max=127,
-                qscheme=torch.per_tensor_symmetric,
-                is_dynamic=False,
-                observer_or_fake_quant_ctr=observer.default_weight_observer,
-            )
-            quantization_config = QuantizationConfig(
-                input_activation=activate_qspec,
-                weight=int8_qspec,
-                bias=None,
-                output_activation=activate_qspec,
-            )
-            OP_TO_ANNOTATOR[self.op_name](model, quantization_config)
-
-        def validate(self, model: torch.fx.GraphModule) -> None:
-            pass
-
+    @skipIfHpu
     @parametrize("dtype", (torch.float32, torch.bfloat16))
     @parametrize("quant_dtype", (torch.int16, torch.float8_e5m2, torch.float8_e4m3fn))
     def test_quantization_dtype(self, dtype, quant_dtype):
+        class DtypeActQuantizer(Quantizer):
+            def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+                info_fun = torch.iinfo if quant_dtype == torch.int16 else torch.finfo
+                activate_qspec = QuantizationSpec(
+                    dtype=quant_dtype,
+                    quant_min=int(info_fun(quant_dtype).min),
+                    quant_max=int(info_fun(quant_dtype).max),
+                    qscheme=torch.per_tensor_affine,
+                    is_dynamic=False,
+                    observer_or_fake_quant_ctr=observer.default_observer,
+                )
+                int8_qspec = QuantizationSpec(
+                    dtype=torch.int8,
+                    quant_min=-128,
+                    quant_max=127,
+                    qscheme=torch.per_tensor_symmetric,
+                    is_dynamic=False,
+                    observer_or_fake_quant_ctr=observer.default_weight_observer,
+                )
+                quantization_config = QuantizationConfig(
+                    input_activation=activate_qspec,
+                    weight=int8_qspec,
+                    bias=None,
+                    output_activation=activate_qspec,
+                )
+                OP_TO_ANNOTATOR["conv"](model, quantization_config)
+
+            def validate(self, model: torch.fx.GraphModule) -> None:
+                pass
+
         class M(torch.nn.Module):
             def __init__(self, dtype):
                 super().__init__()
@@ -1220,7 +1216,7 @@ def __init__(self, dtype):
             def forward(self, x):
                 return self.conv(x)
 
-        quantizer = self.DtypeActQuantizer(quant_dtype=quant_dtype, op_name="conv")
+        quantizer = DtypeActQuantizer()
         node_occurrence = {
             # one for input of the first conv, one for output for the first conv
             torch.ops.quantized_decomposed.quantize_per_tensor.default: 2,
@@ -1456,13 +1452,9 @@ def forward(self, x):
                 for key in n.meta:
                     self.assertEqual(n.meta[key], weight_meta[key])
 
-    @parametrize("quant_dtype", (torch.float32, torch.float8_e5m2, torch.float8_e4m3fn))
-    def test_save_load(self, quant_dtype=None):
+    def test_save_load(self):
         """Test save/load a quantized model"""
-        quantizer = None
-        if quant_dtype != torch.float32:
-            quantizer = self.DtypeActQuantizer(quant_dtype=quant_dtype, op_name="conv")
-        m = self._get_pt2e_quantized_linear(quantizer=quantizer)
+        m = self._get_pt2e_quantized_linear()
         example_inputs = (torch.randn(2, 2),)
         ref_res = m(*example_inputs)
 
diff --git a/torch/_export/serde/schema.py b/torch/_export/serde/schema.py
@@ -27,8 +27,6 @@ class ScalarType(IntEnum):
     COMPLEXDOUBLE = 11
     BOOL = 12
     BFLOAT16 = 13
-    FLOAT8_E5M2 = 23
-    FLOAT8_E4M3FN = 24
     UINT16 = 28
 
 
diff --git a/torch/_export/serde/schema.yaml b/torch/_export/serde/schema.yaml
@@ -1,5 +1,5 @@
 # @generated by update_schema.py
-# checksum<<976f1a95674e0e9ca72f7bb7df2e648172aaae0ca43a52f7c6f814c79a96ddf4>>
+# checksum<<19d86105f895a10d5eedbc6e13d4d96cf5d9182c0367d6825ef2438e124cc536>>
 Argument:
   kind: union
   fields:
@@ -338,8 +338,6 @@ ScalarType:
     COMPLEXDOUBLE: 11
     BOOL: 12
     BFLOAT16: 13
-    FLOAT8_E5M2: 23
-    FLOAT8_E4M3FN: 24
     UINT16: 28
 SchemaVersion:
   kind: struct
diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py
@@ -140,8 +140,6 @@ def _reverse_map(d: Dict[Any, Enum]):
     torch.complex128: ScalarType.COMPLEXDOUBLE,
     torch.bool: ScalarType.BOOL,
     torch.bfloat16: ScalarType.BFLOAT16,
-    torch.float8_e4m3fn: ScalarType.FLOAT8_E4M3FN,
-    torch.float8_e5m2: ScalarType.FLOAT8_E5M2,
 }
 
 
diff --git a/torch/csrc/utils/generated_serialization_types.h b/torch/csrc/utils/generated_serialization_types.h
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
@@ -1333,18 +1333,18 @@ def _quantize(self, m, quantizer, example_inputs, is_qat: bool = False):
         m = convert_pt2e(m)
         return m
 
-    def _get_pt2e_quantized_linear(self, is_per_channel=False, quantizer=None) -> torch.fx.GraphModule:
+    def _get_pt2e_quantized_linear(self, is_per_channel=False) -> torch.fx.GraphModule:
         class M(torch.nn.Module):
             def __init__(self) -> None:
                 super().__init__()
                 self.linear = torch.nn.Linear(2, 2)
 
             def forward(self, x):
                 return self.linear(x)
-        if quantizer is None:
-            quantizer = XNNPACKQuantizer()
-            operator_config = get_symmetric_quantization_config(is_per_channel=is_per_channel)
-            quantizer.set_global(operator_config)
+
+        quantizer = XNNPACKQuantizer()
+        operator_config = get_symmetric_quantization_config(is_per_channel=is_per_channel)
+        quantizer.set_global(operator_config)
         example_inputs = (torch.randn(2, 2),)
         m = M().eval()
         return self._quantize(m, quantizer, example_inputs)

Original file line number	Diff line number	Diff line change
`@@ -140,8 +140,6 @@ def _reverse_map(d: Dict[Any, Enum]):`
`140`	`140`	`torch.complex128: ScalarType.COMPLEXDOUBLE,`
`141`	`141`	`torch.bool: ScalarType.BOOL,`
`142`	`142`	`torch.bfloat16: ScalarType.BFLOAT16,`
`143`		`- torch.float8_e4m3fn: ScalarType.FLOAT8_E4M3FN,`
`144`		`- torch.float8_e5m2: ScalarType.FLOAT8_E5M2,`
`145`	`143`	`}`
`146`	`144`
`147`	`145`