Merge branch 'main' into inference_cost_breakdown

fastmachinelearning · May 21, 2024 · 0ca12ce · 0ca12ce
2 parents 4dd2000 + c5bd87f
commit 0ca12ce
Show file tree

Hide file tree

Showing 15 changed files with 1,288 additions and 51 deletions.
diff --git a/src/qonnx/core/modelwrapper.py b/src/qonnx/core/modelwrapper.py
@@ -346,16 +346,19 @@ def find_producer(self, tensor_name):
                 return x
         return None
 
-    def find_upstream(self, tensor_name, finder_fxn):
+    def find_upstream(self, tensor_name, finder_fxn, keep_if_not_found=False):
         """Follow the producer chain upstream, calling finder_fxn on each upstream
         node until it returns True or there are no nodes left. Returns the list
-        of nodes visited, or None if finder_fxn did not return True."""
+        of nodes visited, or None if finder_fxn did not return True. If
+        keep_if_not_found is specified, returns the list of nodes visited, even
+        if finder_fxn never returned True, i.e., if the search terminated at an
+        input or initializer."""
         visit_list = []
         current_tensor = tensor_name
         while True:
             current_producer = self.find_producer(current_tensor)
             if current_producer is None:
-                return []
+                return visit_list if keep_if_not_found else []
             else:
                 found = finder_fxn(current_producer)
                 visit_list.append(current_producer)
@@ -364,7 +367,7 @@ def find_upstream(self, tensor_name, finder_fxn):
                 elif len(current_producer.input) > 0:
                     current_tensor = current_producer.input[0]
                 else:
-                    return None
+                    return visit_list if keep_if_not_found else None
 
     def find_consumer(self, tensor_name):
         """Finds and returns the node that consumes the tensor with given name.
@@ -532,7 +535,7 @@ def get_non_finn_nodes(self):
         return list(filter(lambda x: not util.is_finn_op(x.domain), self.graph.node))
 
     def get_node_index(self, node):
-        """Returns current index of given node."""
+        """Returns current index of given node, or None if not found."""
         n_ind = 0
         try:
             for n in self.graph.node:
@@ -541,6 +544,17 @@ def get_node_index(self, node):
                 n_ind += 1
         except ValueError:
             return None
+        return None
+
+    def get_node_from_name(self, node_name):
+        """Returns the node with the specified name, or None if not found."""
+        try:
+            for node in self.graph.node:
+                if node.name == node_name:
+                    return node
+        except ValueError:
+            return None
+        return None
 
     def get_tensor_layout(self, tensor_name):
         """Returns the data layout annotation of tensor with given name.

diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/float_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/float_model.onnx
diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/quant_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/quant_model.onnx
diff --git a/...data/onnx/bsd300x3-espcn/float_model.onnx → .../bsd300x3-espcn/subpixel/float_model.onnx b/...data/onnx/bsd300x3-espcn/float_model.onnx → .../bsd300x3-espcn/subpixel/float_model.onnx
diff --git a/...data/onnx/bsd300x3-espcn/quant_model.onnx → .../bsd300x3-espcn/subpixel/quant_model.onnx b/...data/onnx/bsd300x3-espcn/quant_model.onnx → .../bsd300x3-espcn/subpixel/quant_model.onnx
diff --git a/src/qonnx/transformation/extract_quant_scale_zeropt.py b/src/qonnx/transformation/extract_quant_scale_zeropt.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveUniqueParameterTensors, SortGraph
+from qonnx.transformation.remove import RemoveIdentityOps
+
+
+class ExtractQuantScaleZeroPt(Transformation):
+    """Extract any non-identity scale and zero-point Quant inputs as
+    separate Div/Mul (for scale) and Add/Sub (for zeropoint" nodes,
+    preceding and following the Quant node."""
+
+    def apply(self, model: ModelWrapper):
+        graph = model.graph
+        for node in graph.node:
+            if node.op_type == "Quant":
+                quant_node = node
+                input_nm, scale_nm, zeropt_nm, _ = node.input
+                scale_t = model.get_initializer(scale_nm)
+                zeropt_t = model.get_initializer(zeropt_nm)
+                ishp = model.get_tensor_shape(input_nm)
+                extract_scale = False
+                extract_zeropt = False
+                if scale_t is not None and (scale_t != 1).any():
+                    extract_scale = True
+                if zeropt_t is not None and (zeropt_t != 0).any():
+                    extract_zeropt = True
+                if (not extract_scale) and (not extract_zeropt):
+                    continue
+                running_input = input_nm
+                if extract_scale:
+                    # create new Div node that divides the input
+                    # by the scale
+                    inp_scaled_nm = model.make_new_valueinfo_name()
+                    inp_scaled = helper.make_tensor_value_info(
+                        inp_scaled_nm,
+                        TensorProto.FLOAT,
+                        ishp,
+                    )
+                    graph.value_info.append(inp_scaled)
+                    inp_scale_node = helper.make_node("Div", [running_input, scale_nm], [inp_scaled_nm])
+                    graph.node.append(inp_scale_node)
+                    # create new Mul node
+                    # remove scale from Quant node
+                    new_scale_nm = model.make_new_valueinfo_name()
+                    model.set_initializer(new_scale_nm, np.asarray(1.0, dtype=np.float32))
+                    quant_node.input[1] = new_scale_nm
+                    running_input = inp_scaled_nm
+                if extract_zeropt:
+                    # create new Add node that adds the zeropoint to
+                    # the scaled input
+                    inp_zeropt_nm = model.make_new_valueinfo_name()
+                    inp_zeropt = helper.make_tensor_value_info(
+                        inp_zeropt_nm,
+                        TensorProto.FLOAT,
+                        ishp,
+                    )
+                    graph.value_info.append(inp_zeropt)
+                    inp_zeropt_node = helper.make_node("Add", [running_input, zeropt_nm], [inp_zeropt_nm])
+                    graph.node.append(inp_zeropt_node)
+                    # remove zeropt from Quant node
+                    new_zeropt_nm = model.make_new_valueinfo_name()
+                    model.set_initializer(new_zeropt_nm, np.asarray(0.0, dtype=np.float32))
+                    quant_node.input[2] = new_zeropt_nm
+                    running_input = inp_zeropt_nm
+                # rewire node input to any newly created Div/Add nodes
+                quant_node.input[0] = running_input
+                last_node = quant_node
+                final_output = quant_node.output[0]
+                if extract_zeropt:
+                    # create new Sub node that subtracts the zeropoint from
+                    # the output
+                    out_zeropt_nm = model.make_new_valueinfo_name()
+                    out_zeropt = helper.make_tensor_value_info(
+                        out_zeropt_nm,
+                        TensorProto.FLOAT,
+                        ishp,
+                    )
+                    graph.value_info.append(out_zeropt)
+                    out_zeropt_node = helper.make_node("Sub", [out_zeropt_nm, zeropt_nm], [final_output])
+                    last_node.output[0] = out_zeropt_nm
+                    graph.node.append(out_zeropt_node)
+                    # important: when tracking a pointer to newly added nodes,
+                    # ensure the item from the container is used, and not the
+                    # make_node result -- those are different objects
+                    # e.g. if we use last_node = out_zeropt_node below,
+                    # this will point to the wrong object and cause bugs later
+                    last_node = graph.node[-1]
+                if extract_scale:
+                    # create new Mul node that applies the output scale
+                    out_scale_nm = model.make_new_valueinfo_name()
+                    out_scale = helper.make_tensor_value_info(
+                        out_scale_nm,
+                        TensorProto.FLOAT,
+                        ishp,
+                    )
+                    last_node.output[0] = out_scale_nm
+                    graph.value_info.append(out_scale)
+                    out_scale_node = helper.make_node("Mul", [out_scale_nm, scale_nm], [final_output])
+                    graph.node.append(out_scale_node)
+
+                if extract_scale or extract_zeropt:
+                    # since we used append() for new nodes, need to call
+                    # SortGraph to ensure correct (topological) order
+                    model = model.transform(SortGraph())
+                    # Remove potential unity multiplications from alpha and beta attributes
+                    model = model.transform(RemoveIdentityOps())
+                    # Ensure unique parameter tensors
+                    model = model.transform(GiveUniqueParameterTensors())
+                    return model, True
+
+        return model, False
diff --git a/src/qonnx/transformation/lower_convs_to_matmul.py b/src/qonnx/transformation/lower_convs_to_matmul.py
@@ -32,24 +32,7 @@
 
 from qonnx.transformation.base import Transformation
 from qonnx.transformation.extract_conv_bias import ExtractBiasFromConv
-from qonnx.util.basic import get_by_name
-
-
-def _auto_pad_to_explicit_padding(autopad_str, idim_h, idim_w, k_h, k_w, stride_h, stride_w, n_dims):
-    pad_total_h = (stride_h - 1) * idim_h - stride_h + k_h
-    pad_total_w = (stride_w - 1) * idim_w - stride_w + k_w
-    pad_half_small_h = int((pad_total_h / 2))
-    pad_half_small_w = int((pad_total_w / 2))
-    pad_half_large_h = pad_total_h - pad_half_small_h
-    pad_half_large_w = pad_total_w - pad_half_small_w
-    if autopad_str == "VALID":
-        return [0 for i in range(2 * n_dims)]
-    elif autopad_str == "SAME_UPPER":
-        return [pad_half_small_h, pad_half_small_w, pad_half_large_h, pad_half_large_w]
-    elif autopad_str == "SAME_LOWER":
-        return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w]
-    else:
-        raise Exception("Unsupported auto_pad: " + autopad_str)
+from qonnx.util.basic import auto_pad_to_explicit_padding, get_by_name
 
 
 class LowerConvsToMatMul(Transformation):
@@ -100,7 +83,7 @@ def apply(self, model):
                         # use specified padding
                         pad = get_by_name(n.attribute, "pads").ints
                     else:
-                        pad = _auto_pad_to_explicit_padding(
+                        pad = auto_pad_to_explicit_padding(
                             auto_pad,
                             ifm_dim_h,
                             ifm_dim_w,