started to add some basic quantized operations: QuantizeLinear, Dequa…

…ntizeLinear, QLinearAdd, QLinearMatMul, QLinearGlobalAveragePool and QLinearConv. Those operations (together with already implemented ones) should be enough to run 8-bit Resnet-50.
vpisarev · Aug 7, 2022 · 49f2f78 · 49f2f78
1 parent 41dbff2
commit 49f2f78
Show file tree

Hide file tree

Showing 6 changed files with 641 additions and 47 deletions.
diff --git a/lib/NN/Ast.fx b/lib/NN/Ast.fx
@@ -158,14 +158,17 @@ class nnop_t =
         t_passby: int }
     | NN_ConvTranspose: {
         name: string
-        kernel_shape: int []
-        pads: int []
-        strides: int []
-        dilations: int []
+        attr: nnconv_attr_t
         out_shape: int []
         out_padding: int []
-        group: int
         t_inp: int; t_weights: int; t_bias: int; t_out: int }
+    | NN_DequantizeLinear: {
+        name: string
+        axis: int
+        t_inp: int
+        t_scale: int
+        t_zp: int
+        t_out: int }
     | NN_Dropout: {
         name: string; seed: int = 0
         t_inp: int; t_ratio: int; t_training_mode: int; t_out: int }
@@ -220,6 +223,40 @@ class nnop_t =
         t_out: int }
     | NN_NonZero: {
         name: string; t_inp: int; t_out: int }
+    | NN_QLinearConv: {
+        name: string
+        attr: nnconv_attr_t
+        qconv_data: cptr ref
+        t_inp: int; t_weights: int
+        t_bias: int; t_out: int;
+        t_inp_scale: int; t_inp_zp: int;
+        t_w_scale: int; t_w_zp: int;
+        t_out_scale: int; t_out_zp: int }
+    | NN_QLinearAdd: {
+        name: string
+        t_A: int; t_B: int; t_out: int
+        t_A_scale: int; t_A_zp: int
+        t_B_scale: int; t_B_zp: int
+        t_out_scale: int; t_out_zp: int }
+    | NN_QLinearGlobalAvgPool: {
+        name: string
+        channels_last: bool
+        t_inp: int; t_out: int
+        t_inp_scale: int; t_inp_zp: int
+        t_out_scale: int; t_out_zp: int }
+    | NN_QLinearMatMul: {
+        name: string
+        t_A: int; t_B: int; t_out: int
+        t_A_scale: int; t_A_zp: int
+        t_B_scale: int; t_B_zp: int
+        t_out_scale: int; t_out_zp: int }
+    | NN_QuantizeLinear: {
+        name: string
+        axis: int
+        t_inp: int
+        t_scale: int
+        t_zp: int
+        t_out: int }
     | NN_Range: {
         name: string; t_start: int; t_limit: int; t_delta: int; t_out: int }
     | NN_Reduce: {
@@ -814,6 +851,7 @@ fun nnop_t.name(): (string, string) = match self
     | NN_ConstantOfShape {name} => (name, "ConstantOfShape")
     | NN_Conv {name} => (name, "Conv")
     | NN_ConvTranspose {name} => (name, "ConvTranspose")
+    | NN_DequantizeLinear {name} => (name, "DequantizeLinear")
     | NN_Dropout {name} => (name, "Dropout")
     | NN_Elemwise {name, el_op} => (name, string(el_op))
     | NN_Expand {name} => (name, "Expand")
@@ -829,6 +867,11 @@ fun nnop_t.name(): (string, string) = match self
     | NN_MaxPool {name} => (name, "MaxPool")
     | NN_NonMaxSuppression {name} => (name, "NonMaxSuppression")
     | NN_NonZero {name} => (name, "NonZero")
+    | NN_QLinearAdd {name} => (name, "QLinearAdd")
+    | NN_QLinearGlobalAvgPool {name} => (name, "QLinearGlobalAvgPool")
+    | NN_QLinearMatMul {name} => (name, "QLinearMatMul")
+    | NN_QLinearConv {name} => (name, "QLinearConv")
+    | NN_QuantizeLinear {name} => (name, "QuantizeLinear")
     | NN_Range {name} => (name, "Range")
     | NN_Reduce {name, reduce_op} => (name, string(reduce_op))
     | NN_Resize {name} => (name, "Resize")
@@ -844,7 +887,7 @@ fun nnop_t.name(): (string, string) = match self
     | NN_TopK {name} => (name, "TopK")
     | NN_Transpose {name} => (name, "Transpose")
     | NN_Unsqueeze {name} => (name, "Unsqueeze")
-    | NN_Nop => ("", "Nop")
+    | NN_Nop => ("<nop>", "Nop")
 }
 
 fun nnop_t.perf_profile_index(): int = match self {
@@ -873,6 +916,7 @@ fun nnop_t.get_inputs_outputs(): (int [], int []) = match self
     | NN_ConstantOfShape {t_shape, t_out} => ([t_shape], [t_out])
     | NN_Conv {t_inp, t_weights, t_bias, t_out, t_passby} => ([t_inp, t_weights, t_bias, t_passby], [t_out])
     | NN_ConvTranspose {t_inp, t_weights, t_bias, t_out} => ([t_inp, t_weights, t_bias], [t_out])
+    | NN_DequantizeLinear {t_inp, t_scale, t_zp, t_out} => ([t_inp, t_scale, t_zp], [t_out])
     | NN_Dropout {t_inp, t_ratio, t_training_mode, t_out} => ([t_inp, t_ratio, t_training_mode], [t_out])
     | NN_Elemwise {t_inp, t_out} => (t_inp, [t_out])
     | NN_Expand {t_inp, t_shape, t_out} => ([t_inp, t_shape], [t_out])
@@ -891,6 +935,19 @@ fun nnop_t.get_inputs_outputs(): (int [], int []) = match self
         t_iou_threshold, t_score_threshold, t_out} =>
             ([t_boxes, t_scores, t_max_output_boxes_per_class, t_iou_threshold, t_score_threshold], [t_out])
     | NN_NonZero {t_inp, t_out} => ([t_inp], [t_out])
+    | NN_QLinearAdd {t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp,
+                    t_out_scale, t_out_zp, t_out} =>
+        ([t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp, t_out_scale, t_out_zp], [t_out])
+    | NN_QLinearConv {t_inp, t_weights, t_bias, t_out, t_inp_scale, t_inp_zp,
+                      t_w_scale, t_w_zp, t_out_scale, t_out_zp} =>
+        ([t_inp, t_weights, t_bias, t_inp_scale, t_inp_zp,
+          t_w_scale, t_w_zp, t_out_scale, t_out_zp], [t_out])
+    | NN_QLinearGlobalAvgPool {t_inp, t_out, t_inp_scale, t_inp_zp, t_out_scale, t_out_zp} =>
+        ([t_inp, t_inp_scale, t_inp_zp, t_out_scale, t_out_zp], [t_out])
+    | NN_QLinearMatMul {t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp,
+                    t_out_scale, t_out_zp, t_out} =>
+        ([t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp, t_out_scale, t_out_zp], [t_out])
+    | NN_QuantizeLinear {t_inp, t_scale, t_zp, t_out} => ([t_inp, t_scale, t_zp], [t_out])
     | NN_Range {t_start, t_limit, t_delta, t_out} => ([t_start, t_limit, t_delta], [t_out])
     | NN_Reduce {t_inp, t_out} => ([t_inp], [t_out])
     | NN_Reshape {t_inp, t_shape, t_out} => ([t_inp, t_shape], [t_out])
@@ -910,6 +967,11 @@ fun nnop_t.get_inputs_outputs(): (int [], int []) = match self
 
 fun op2str(model: nnmodel_t, op: nnop_t, indent: string): string
 {
+    fun conv_attr2str(attr: nnconv_attr_t) =
+        f"kernel_shape={attr.kernel_shape}, \
+        pads={attr.pads}, strides={attr.strides}, \
+        dilations={attr.dilations}, group={attr.group}"
+
     val sub_indent = indent + "  "
     //println(f"dumping op={op.name()}")
     match op {
@@ -946,14 +1008,18 @@ fun op2str(model: nnmodel_t, op: nnop_t, indent: string): string
             if t_passby > 0 {
                 (" + Add", f", passby=\"{model.args[t_passby].name}\"")
             } else {("", "")}
-        op2str(convname, "Conv" + bnorm_name + passby_name + activ_name, f"kernel_shape={attr.kernel_shape}, \
-            pads={attr.pads}, strides={attr.strides}, dilations={attr.dilations}, group={attr.group}{passby_attr}",
-            t2str(model, [("t_inp", t_inp), ("t_weights", t_weights), ("t_bias", t_bias), ("t_out", t_out)]), indent)
-    | NN_ConvTranspose {name, kernel_shape, pads, strides, dilations, group,
-        out_shape, out_padding, t_inp, t_weights, t_bias, t_out} =>
-        op2str(name, "Conv", f"kernel_shape={kernel_shape}, \
-            pads={pads}, strides={strides}, dilations={dilations}, group={group}, out_padding={out_padding}, out_shape={out_shape}",
+        op2str(convname, "Conv" + bnorm_name + passby_name + activ_name,
+            conv_attr2str(attr) + f"{passby_attr}",
+            t2str(model, [("t_inp", t_inp), ("t_weights", t_weights),
+                ("t_bias", t_bias), ("t_out", t_out)]), indent)
+    | NN_ConvTranspose {name, attr, out_shape, out_padding,
+                        t_inp, t_weights, t_bias, t_out} =>
+        op2str(name, "Conv", conv_attr2str(attr) + f"out_padding={out_padding}, out_shape={out_shape}",
             t2str(model, [("t_inp", t_inp), ("t_weights", t_weights), ("t_bias", t_bias), ("t_out", t_out)]), indent)
+    | NN_DequantizeLinear {name, axis, t_inp, t_scale, t_zp, t_out} =>
+        op2str(name, "DequantizeLinear", f"axis={axis}",
+            t2str(model, [("t_inp", t_inp), ("t_scale", t_scale),
+                   ("t_zp", t_zp), ("t_out", t_out)]), indent)
     | NN_Dropout {name, seed, t_inp, t_ratio, t_training_mode, t_out} =>
         op2str(name, "Dropout", f"seed={seed}", t2str(model,
             [("t_inp", t_inp), ("t_ratio", t_ratio), ("t_training_mode", t_training_mode), ("t_out", t_out)]), indent)
@@ -1002,6 +1068,39 @@ fun op2str(model: nnmodel_t, op: nnop_t, indent: string): string
             ("t_iou_threshold", t_iou_threshold), ("t_score_threshold", t_score_threshold), ("t_out", t_out)]), indent)
     | NN_NonZero { name, t_inp, t_out } =>
         op2str(name, "NonZero", "", t2str(model, [("t_inp", t_inp), ("t_out", t_out)]), indent)
+    | NN_QLinearAdd {name, t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp,
+                    t_out_scale, t_out_zp, t_out} =>
+        op2str(name, "QLinearAdd", "",
+            t2str(model, [("t_A", t_A), ("t_B", t_B),
+                ("t_A_scale", t_A_scale), ("t_A_zp", t_A_zp),
+                ("t_B_scale", t_B_scale), ("t_B_zp", t_B_zp),
+                ("t_out", t_out)]), indent)
+    | NN_QLinearConv {name, attr, t_inp, t_weights, t_bias, t_out, t_inp_scale, t_inp_zp,
+                      t_w_scale, t_w_zp, t_out_scale, t_out_zp} =>
+        op2str(name, "QLinearConv", conv_attr2str(attr),
+            t2str(model, [("t_inp", t_inp), ("t_weights", t_weights), ("t_bias", t_bias),
+                ("t_inp_scale", t_inp_scale), ("t_inp_zp", t_inp_zp),
+                ("t_w_scale", t_w_scale), ("t_w_zp", t_w_zp),
+                ("t_out_scale", t_out_scale), ("t_out_zp", t_out_zp),
+                ("t_out", t_out)]), indent)
+    | NN_QLinearGlobalAvgPool {name, channels_last, t_inp, t_out, t_inp_scale,
+                               t_inp_zp, t_out_scale, t_out_zp} =>
+        op2str(name, "QLinearGlobalAvgPool", f"channels_last={channels_last}",
+            t2str(model, [("t_inp", t_inp),
+                ("t_inp_scale", t_inp_scale), ("t_inp_zp", t_inp_zp),
+                ("t_out_scale", t_out_scale), ("t_out_zp", t_out_zp),
+                ("t_out", t_out)]), indent)
+    | NN_QLinearMatMul {name, t_A, t_A_scale, t_A_zp, t_B, t_B_scale, t_B_zp,
+                    t_out_scale, t_out_zp, t_out} =>
+        op2str(name, "QLinearMatMul", "",
+            t2str(model, [("t_A", t_A), ("t_B", t_B),
+                ("t_A_scale", t_A_scale), ("t_A_zp", t_A_zp),
+                ("t_B_scale", t_B_scale), ("t_B_zp", t_B_zp),
+                ("t_out", t_out)]), indent)
+    | NN_QuantizeLinear {name, axis, t_inp, t_scale, t_zp, t_out} =>
+        op2str(name, "QuantizeLinear", f"axis={axis}",
+            t2str(model, [("t_inp", t_inp), ("t_scale", t_scale),
+                   ("t_zp", t_zp), ("t_out", t_out)]), indent)
     | NN_Range {name, t_start, t_limit, t_delta, t_out} =>
         op2str(name, "Range", "", t2str(model, [("t_start", t_start), ("t_limit", t_limit),
             ("t_delta", t_delta), ("t_out", t_out)]), indent)
@@ -1010,8 +1109,9 @@ fun op2str(model: nnmodel_t, op: nnop_t, indent: string): string
             t2str(model, [("t_inp", t_inp), ("t_out", t_out)]), indent)
     | NN_Resize { name, coord_trans, cubic_coeff_a, exclude_outside, extrapolation_value,
         mode, nearest_mode, t_inp, t_scales, t_sizes, t_roi, t_out } =>
-        val nearest_mode_str = if mode == NN_Inter_Nearest {f", nearest_mode={nearest_mode}"} else {""}
-        val tensors = [:: ("t_out", t_out)]
+        val nearest_mode_str =
+            if mode == NN_Inter_Nearest {f", nearest_mode={nearest_mode}"} else {""}
+        val tensors = ("t_out", t_out) :: []
         val tensors = if coord_trans == NN_CT_TFCropResize {("t_roi", t_roi) :: tensors} else {tensors}
         val tensors = ("t_scales", t_scales) :: ("t_sizes", t_sizes) :: tensors
         op2str(name, "Resize", f"coord_trans={coord_trans}, cubic_coeff_a={cubic_coeff_a},\

diff --git a/lib/NN/FromOnnx.fx b/lib/NN/FromOnnx.fx
@@ -235,6 +235,35 @@ fun convert(onnx_model: OAst.model_t): Ast.nnmodel_t
             find_inp_arg((if pos >= 0 {nspace[:pos]} else {""}), argname)
         }
 
+    fun parse_conv_attr(node: OAst.node_t) {
+        var kernel_shape = [], strides = [], dilations = []
+        var pads = [], auto_pad = Ast.NN_Pad_None, group = 1
+        var out_padding = [], out_shape = []
+        for a <- node.attrs {
+            | {name="kernel_shape"} => kernel_shape = attr2ints(a)
+            | {name="pads"} => pads = attr2ints(a)
+            | {name="strides"} => strides = attr2ints(a)
+            | {name="dilations"} => dilations = attr2ints(a)
+            | {name="group"} => group = attr2int(a)
+            | {name="auto_pad"} => auto_pad = attr2autopad(a)
+            | {name="out_padding"} => out_padding = attr2ints(a)
+            | {name="out_shape"} => out_shape = attr2ints(a)
+            | _ => {}
+        }
+        val dims = size(kernel_shape)
+        if dims == 0 {
+            throw OnnxConvertError(f"{node.name} (op={node.op}): missing kernel shape")
+        }
+        if pads == [] {pads = array(dims, 0)}
+        if strides == [] {strides = array(dims, 1)}
+        if dilations == [] {dilations = array(dims, 1)}
+        val pads = autopad2pads(auto_pad, kernel_shape, pads)
+        (Ast.nnconv_attr_t {
+            kernel_shape=kernel_shape, pads=pads,
+            strides=strides, dilations=dilations, group = group },
+        out_padding, out_shape)
+    }
+
     fun convert_graph(onnx_graph: OAst.graph_t, nspace: string, nested: bool) {
         val nspace_ = if nspace == "" {nspace} else {nspace + "::"}
         for c <- onnx_graph.initializers {
@@ -508,34 +537,11 @@ fun convert(onnx_model: OAst.model_t): Ast.nnmodel_t
             | "Conv" | "ConvTranspose" =>
                 assert(`ninputs == 2 || ninputs == 3`)
                 assert(`noutputs == 1`)
-                var kernel_shape = [], strides = [], dilations = []
-                var pads = [], auto_pad = Ast.NN_Pad_None, group = 1
-                var out_padding = [], out_shape = []
-                for a <- node.attrs {
-                    | {name="kernel_shape"} => kernel_shape = attr2ints(a)
-                    | {name="pads"} => pads = attr2ints(a)
-                    | {name="strides"} => strides = attr2ints(a)
-                    | {name="dilations"} => dilations = attr2ints(a)
-                    | {name="group"} => group = attr2int(a)
-                    | {name="auto_pad"} => auto_pad = attr2autopad(a)
-                    | {name="out_padding"} => out_padding = attr2ints(a)
-                    | {name="out_shape"} => out_shape = attr2ints(a)
-                    | _ => {}
-                }
-                val dims = size(kernel_shape)
-                if dims == 0 {
-                    throw OnnxConvertError(f"{node.name} (op=Conv): missing kernel shape")
-                }
-                if pads == [] {pads = array(dims, 0)}
-                if strides == [] {strides = array(dims, 1)}
-                if dilations == [] {dilations = array(dims, 1)}
-                val pads = autopad2pads(auto_pad, kernel_shape, pads)
+                val (conv_attr, out_padding, out_shape) = parse_conv_attr(node)
                 if node.op == "Conv" {
                     [:: Ast.NN_Conv {
                         name=name,
-                        attr=Ast.nnconv_attr_t {
-                            kernel_shape=kernel_shape, pads=pads,
-                            strides=strides, dilations=dilations, group = group },
+                        attr=conv_attr,
                         conv_data=ref null,
                         fused_batch_norm=None,
                         non_const_batch_norm=false,
@@ -546,13 +552,26 @@ fun convert(onnx_model: OAst.model_t): Ast.nnmodel_t
                         t_out=outputs[0], t_passby=0}]
                 } else {
                     [:: Ast.NN_ConvTranspose {
-                        name=name, kernel_shape=kernel_shape, pads=pads,
-                        strides=strides, dilations=dilations, group = group,
+                        name=name, attr=conv_attr,
                         out_padding = out_padding, out_shape = out_shape,
                         t_inp=inputs[0], t_weights=inputs[1],
                         t_bias=(if ninputs == 3 {inputs[2]} else {0}),
                         t_out=outputs[0]}]
                 }
+            | "DequantizeLinear" =>
+                assert(`ninputs == 2 || ninputs == 3`)
+                assert(`noutputs == 1`)
+                var axis = 1
+                for a <- node.attrs {
+                    | {name="axis"} => axis = attr2int(a)
+                    | _ => {}
+                }
+                [:: Ast.NN_DequantizeLinear {
+                    name=name, axis=axis,
+                    t_inp=inputs[0],
+                    t_scale=inputs[1],
+                    t_zp=(if ninputs >= 3 {inputs[2]} else {0}),
+                    t_out=outputs[0] }]
             | "Dropout" =>
                 assert(`1 <= ninputs <= 3`)
                 assert(`noutputs == 1 || noutputs == 2`)
@@ -705,6 +724,63 @@ fun convert(onnx_model: OAst.model_t): Ast.nnmodel_t
                 assert(`ninputs == 1`)
                 assert(`noutputs == 1`)
                 [:: Ast.NN_NonZero { name=name, t_inp=inputs[0], t_out=outputs[0] }]
+            | "QLinearAdd" =>
+                assert(`ninputs == 8`)
+                assert(`noutputs == 1`)
+                [:: Ast.NN_QLinearAdd {
+                    name=name,
+                    t_A=inputs[0], t_A_scale=inputs[1], t_A_zp=inputs[2],
+                    t_B=inputs[3], t_B_scale=inputs[4], t_B_zp=inputs[5],
+                    t_out_scale=inputs[6], t_out_zp=inputs[7],
+                    t_out=outputs[0]}]
+            | "QLinearConv" =>
+                assert(`ninputs == 8 || ninputs == 9`)
+                assert(`noutputs == 1`)
+                val (conv_attr, _, _) = parse_conv_attr(node)
+                [:: Ast.NN_QLinearConv {
+                    name=name, attr=conv_attr,
+                    qconv_data=ref null,
+                    t_inp=inputs[0], t_inp_scale=inputs[1], t_inp_zp=inputs[2],
+                    t_weights=inputs[3], t_w_scale=inputs[4], t_w_zp=inputs[5],
+                    t_out_scale=inputs[6], t_out_zp=inputs[7],
+                    t_bias=(if ninputs >= 8 {inputs[8]} else {0}),
+                    t_out=outputs[0]}]
+            | "QLinearGlobalAveragePool" =>
+                assert(`ninputs == 5`)
+                assert(`noutputs == 1`)
+                var channels_last = 0
+                for a <- node.attrs {
+                    | {name="channels_last"} => channels_last = attr2int(a)
+                    | _ => {}
+                }
+                [:: Ast.NN_QLinearGlobalAvgPool {
+                    name=name, channels_last = channels_last != 0,
+                    t_inp=inputs[0], t_inp_scale=inputs[1], t_inp_zp=inputs[2],
+                    t_out_scale=inputs[3], t_out_zp=inputs[4],
+                    t_out=outputs[0]}]
+            | "QLinearMatMul" =>
+                assert(`ninputs == 8`)
+                assert(`noutputs == 1`)
+                [:: Ast.NN_QLinearMatMul {
+                    name=name,
+                    t_A=inputs[0], t_A_scale=inputs[1], t_A_zp=inputs[2],
+                    t_B=inputs[3], t_B_scale=inputs[4], t_B_zp=inputs[5],
+                    t_out_scale=inputs[6], t_out_zp=inputs[7],
+                    t_out=outputs[0]}]
+            | "QuantizeLinear" =>
+                assert(`ninputs == 2 || ninputs == 3`)
+                assert(`noutputs == 1`)
+                var axis = 1
+                for a <- node.attrs {
+                    | {name="axis"} => axis = attr2int(a)
+                    | _ => {}
+                }
+                [:: Ast.NN_QuantizeLinear {
+                    name=name, axis=axis,
+                    t_inp=inputs[0],
+                    t_scale=inputs[1],
+                    t_zp=(if ninputs >= 3 {inputs[2]} else {0}),
+                    t_out=outputs[0] }]
             | "Range" =>
                 assert(`ninputs == 3`)
                 assert(`noutputs == 1`)