From 7608e7c90df931a8daeababf5239f24003a9dec5 Mon Sep 17 00:00:00 2001
From: Harish <harish.harish@amd.com>
Date: Thu, 15 Feb 2024 11:29:36 +0000
Subject: [PATCH 1/7] inference cost breakdown

---
 src/qonnx/analysis/inference_cost.py          | 25 ++++--
 src/qonnx/util/inference_cost.py              | 76 +++++++++++-----
 .../analysis/test_inference_cost_breakdown.py | 88 +++++++++++++++++++
 3 files changed, 160 insertions(+), 29 deletions(-)
 create mode 100644 tests/analysis/test_inference_cost_breakdown.py

diff --git a/src/qonnx/analysis/inference_cost.py b/src/qonnx/analysis/inference_cost.py
index 98e03428..847058b7 100644
--- a/src/qonnx/analysis/inference_cost.py
+++ b/src/qonnx/analysis/inference_cost.py
@@ -201,10 +201,10 @@ def inference_cost_upsample(model, node, discount_sparsity):
     return ret
 
 
-def inference_cost(model, discount_sparsity=True):
+def inference_cost(model, discount_sparsity=True, cost_breakdown=False):
     "Ensure all nodes have unique names prior to calling this analysis pass."
 
-    node_costs = {}
+    ret, node_costs, nodes_per_optype = {}, {}, {}
     zero_cost_ops = [
         "MaxPool",
         "AveragePool",
@@ -240,13 +240,24 @@ def inference_cost(model, discount_sparsity=True):
         if node.op_type in inference_cost_fxn_map.keys():
             node_cost = inference_cost_fxn_map[node.op_type](model, node, discount_sparsity)
             node_costs[node.name] = node_cost
+            if node.op_type not in nodes_per_optype.keys():
+                new_optype = {}
+                new_optype[node.name] = node_cost
+                nodes_per_optype[node.op_type] = new_optype
+            else:
+                nodes_per_optype[node.op_type][node.name] = node_cost
         elif node.op_type in zero_cost_ops:
             continue
         else:
             unsupported_ops.add(node.op_type)
-
-    ret = aggregate_dict_keys(node_costs)
-    ret["unsupported"] = unsupported_ops
-    ret["discount_sparsity"] = discount_sparsity
-
+    total = aggregate_dict_keys(node_costs)
+    total["unsupported"] = unsupported_ops
+    total["discount_sparsity"] = discount_sparsity
+    ret["total_cost"] = total
+    if cost_breakdown:
+        optype_cost = {}
+        for optype, resources in nodes_per_optype.items():
+            optype_cost[optype] = aggregate_dict_keys(resources)
+        ret["optype_cost"] = optype_cost
+        ret["node_cost"] = node_costs
     return ret
diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 86428c76..22ee140d 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -71,7 +71,13 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
 
 
 def inference_cost(
-    model_filename_or_wrapper, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True
+    model_filename_or_wrapper,
+    *,
+    output_json=None,
+    output_onnx=None,
+    preprocess=True,
+    discount_sparsity=True,
+    cost_breakdown=False
 ):
     """Return the inference cost estimate metric for given ONNX model.
     Supports the Quant op for weight/activation quantization.
@@ -83,8 +89,8 @@ def inference_cost(
     :param preprocess: If set, run preprocessing steps such as shape inference,
         datatype inference and constant folding. Strongly recommended.
     :param discount_sparsity: If set, will discount op cost of MAC ops with a
-        constant zero weight, and the mem cost of constant zero weights.
-    """
+        constant zero weight, and the mem cost of constant zero weights."""
+    combined_results = {}
     if isinstance(model_filename_or_wrapper, ModelWrapper):
         model = model_filename_or_wrapper
     else:
@@ -104,25 +110,51 @@ def inference_cost(
     model = model.transform(GiveReadableTensorNames())
     if output_onnx is not None:
         model.save(output_onnx)
-    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
-    bops, macs = compute_bops_and_macs(ret)
-    mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(ret, "mem_w")
-    mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(ret, "mem_o")
-    ret["total_bops"] = bops
-    ret["total_macs"] = macs
-    ret["total_mem_w_bits"] = mem_w_bits
-    ret["total_mem_w_elems"] = mem_w_elems
-    ret["total_mem_o_bits"] = mem_o_bits
-    ret["total_mem_o_elems"] = mem_o_elems
-
-    if "unsupported" in ret:
-        ret["unsupported"] = str(ret["unsupported"])
-
-    if output_json is not None:
-        with open(output_json, "w") as f:
-            json.dump(ret, f, sort_keys=True, indent=2)
-
-    return ret
+    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown))
+    for i, res in ret.items():
+        if i == "total_cost":
+            bops, macs = compute_bops_and_macs(res)
+            mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res, "mem_w")
+            mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res, "mem_o")
+            res["total_bops"] = bops
+            res["total_macs"] = macs
+            res["total_mem_w_bits"] = mem_w_bits
+            res["total_mem_w_elems"] = mem_w_elems
+            res["total_mem_o_bits"] = mem_o_bits
+            res["total_mem_o_elems"] = mem_o_elems
+            if "unsupported" in res:
+                res["unsupported"] = str(res["unsupported"])
+            if output_json is not None:
+                with open(output_json, "w") as f:
+                    json.dump(res, f, sort_keys=True, indent=2)
+            combined_results[i] = res
+        elif i == "optype_cost":
+            per_optype_breakdown = {}
+            for optype, op_res in res.items():
+                bops, macs = compute_bops_and_macs(op_res)
+                mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(op_res, "mem_w")
+                mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(op_res, "mem_o")
+                op_res["total_bops"] = bops
+                op_res["total_macs"] = macs
+                op_res["total_mem_w_bits"] = mem_w_bits
+                op_res["total_mem_w_elems"] = mem_w_elems
+                op_res["total_mem_o_bits"] = mem_o_bits
+                op_res["total_mem_o_elems"] = mem_o_elems
+                per_optype_breakdown[optype] = op_res
+            combined_results[i] = per_optype_breakdown
+        else:
+            per_node_breakdown = {}
+            for node_name in res.keys():
+                node_cost = res[node_name]
+                mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(node_cost, "mem_w")
+                mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(node_cost, "mem_o")
+                node_cost["total_mem_w_bits"] = mem_w_bits
+                node_cost["total_mem_w_elems"] = mem_w_elems
+                node_cost["total_mem_o_bits"] = mem_o_bits
+                node_cost["total_mem_o_elems"] = mem_o_elems
+                per_node_breakdown[node_name] = node_cost
+            combined_results[i] = per_node_breakdown
+    return combined_results
 
 
 def main():
diff --git a/tests/analysis/test_inference_cost_breakdown.py b/tests/analysis/test_inference_cost_breakdown.py
new file mode 100644
index 00000000..b798eaf0
--- /dev/null
+++ b/tests/analysis/test_inference_cost_breakdown.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import os
+import urllib.request
+
+from qonnx.analysis.inference_cost import aggregate_dict_keys
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.cleanup import cleanup
+from qonnx.util.inference_cost import inference_cost as infca
+
+download_url = "https://github.com/onnx/models/raw/main/validated/vision/"
+download_url += "classification/resnet/model/resnet18-v1-7.onnx?download="
+
+model_details = {
+    "resnet18-v1-7": {
+        "description": "Resnet18 Opset version 7.",
+        "url": download_url,
+        "enc": {
+            "a": "op_mac_FLOAT32_FLOAT32",
+            "b": "total_mem_w_bits",
+            "c": "total_mem_w_elems",
+            "d": "total_mem_o_bits",
+            "e": "total_mem_o_elems",
+        },
+    },
+}
+
+
+def download_model(test_model, do_cleanup=False, return_modelwrapper=False):
+    qonnx_url = model_details[test_model]["url"]
+    # download test data
+    dl_dir = "/tmp"
+    dl_file = dl_dir + f"/{test_model}.onnx"
+    ret = dl_file
+    if not os.path.isfile(dl_file):
+        urllib.request.urlretrieve(qonnx_url, dl_file)
+    if do_cleanup:
+        out_file = dl_dir + f"/{test_model}_clean.onnx"
+        cleanup(dl_file, out_file=out_file, override_inpsize=1)
+        ret = out_file
+    if return_modelwrapper:
+        ret = ModelWrapper(ret)
+    return ret
+
+
+@pytest.mark.parametrize("test_model", model_details.keys())
+def test_inference_cost_breakdown(test_model):
+    test_details = model_details[test_model]
+    model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
+    inf_cost = infca(model, discount_sparsity=False, cost_breakdown=True)
+    print(inf_cost.keys())
+    t_cost = inf_cost["total_cost"]  # total cost
+    op_cost = aggregate_dict_keys(inf_cost["optype_cost"])  # cost per optype
+    n_cost = aggregate_dict_keys(inf_cost["node_cost"])  # cost per node.
+    enc = test_details["enc"]
+    assert t_cost[enc["a"]] == op_cost[enc["a"]] == n_cost[enc["a"]], "inf discrepancy"
+    assert t_cost[enc["b"]] == op_cost[enc["b"]] == n_cost[enc["b"]], "inf discrepancy"
+    assert t_cost[enc["c"]] == op_cost[enc["c"]] == n_cost[enc["c"]], "inf discrepancy"
+    assert t_cost[enc["d"]] == op_cost[enc["d"]] == n_cost[enc["d"]], "inf discrepancy"
+    assert t_cost[enc["e"]] == op_cost[enc["e"]] == n_cost[enc["e"]], "inf discrepancy"

From d1207422626d70084e18c4a526dd43e440ac2825 Mon Sep 17 00:00:00 2001
From: Harish <62412574+Harsh9650@users.noreply.github.com>
Date: Thu, 15 Feb 2024 12:06:35 +0000
Subject: [PATCH 2/7] Update inference_cost.py

---
 src/qonnx/util/inference_cost.py | 34 ++++++++++++--------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 22ee140d..7a212321 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -69,6 +69,14 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
             total_mem_elems += v
     return total_mem_bits, total_mem_elems
 
+def assign_mem_bits_and_elems(res_dict):
+    mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w")
+    mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o")
+    res_dict["total_mem_w_bits"] = mem_w_bits
+    res_dict["total_mem_w_elems"] = mem_w_elems
+    res_dict["total_mem_o_bits"] = mem_o_bits
+    res_dict["total_mem_o_elems"] = mem_o_elems
+    return res_dict
 
 def inference_cost(
     model_filename_or_wrapper,
@@ -114,14 +122,9 @@ def inference_cost(
     for i, res in ret.items():
         if i == "total_cost":
             bops, macs = compute_bops_and_macs(res)
-            mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res, "mem_w")
-            mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res, "mem_o")
+            res = assign_mem_bits_and_elems(res)
             res["total_bops"] = bops
             res["total_macs"] = macs
-            res["total_mem_w_bits"] = mem_w_bits
-            res["total_mem_w_elems"] = mem_w_elems
-            res["total_mem_o_bits"] = mem_o_bits
-            res["total_mem_o_elems"] = mem_o_elems
             if "unsupported" in res:
                 res["unsupported"] = str(res["unsupported"])
             if output_json is not None:
@@ -132,31 +135,20 @@ def inference_cost(
             per_optype_breakdown = {}
             for optype, op_res in res.items():
                 bops, macs = compute_bops_and_macs(op_res)
-                mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(op_res, "mem_w")
-                mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(op_res, "mem_o")
+                op_res = assign_mem_bits_and_elems(op_res)
                 op_res["total_bops"] = bops
                 op_res["total_macs"] = macs
-                op_res["total_mem_w_bits"] = mem_w_bits
-                op_res["total_mem_w_elems"] = mem_w_elems
-                op_res["total_mem_o_bits"] = mem_o_bits
-                op_res["total_mem_o_elems"] = mem_o_elems
                 per_optype_breakdown[optype] = op_res
             combined_results[i] = per_optype_breakdown
         else:
             per_node_breakdown = {}
             for node_name in res.keys():
-                node_cost = res[node_name]
-                mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(node_cost, "mem_w")
-                mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(node_cost, "mem_o")
-                node_cost["total_mem_w_bits"] = mem_w_bits
-                node_cost["total_mem_w_elems"] = mem_w_elems
-                node_cost["total_mem_o_bits"] = mem_o_bits
-                node_cost["total_mem_o_elems"] = mem_o_elems
-                per_node_breakdown[node_name] = node_cost
+                node_res = res[node_name]
+                node_res = assign_mem_bits_and_elems(node_res)
+                per_node_breakdown[node_name] = node_res
             combined_results[i] = per_node_breakdown
     return combined_results
 
-
 def main():
     clize.run(inference_cost)
 

From be9a9f838ab7bd9a107ab937d453f64b22da22f4 Mon Sep 17 00:00:00 2001
From: Harish <62412574+Harsh9650@users.noreply.github.com>
Date: Wed, 21 Feb 2024 13:07:52 +0000
Subject: [PATCH 3/7] Update inference_cost.py

---
 src/qonnx/util/inference_cost.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 7a212321..30ac677d 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -44,7 +44,6 @@
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_shapes import InferShapes
 
-
 def compute_bops_and_macs(inf_cost_dict):
     total_bops = 0.0
     total_macs = 0.0
@@ -57,7 +56,6 @@ def compute_bops_and_macs(inf_cost_dict):
             total_macs += v
     return total_bops, total_macs
 
-
 def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
     total_mem_bits = 0.0
     total_mem_elems = 0.0
@@ -98,6 +96,7 @@ def inference_cost(
         datatype inference and constant folding. Strongly recommended.
     :param discount_sparsity: If set, will discount op cost of MAC ops with a
         constant zero weight, and the mem cost of constant zero weights."""
+    
     combined_results = {}
     if isinstance(model_filename_or_wrapper, ModelWrapper):
         model = model_filename_or_wrapper
@@ -118,7 +117,8 @@ def inference_cost(
     model = model.transform(GiveReadableTensorNames())
     if output_onnx is not None:
         model.save(output_onnx)
-    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown))
+    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity,
+                                                        cost_breakdown))
     for i, res in ret.items():
         if i == "total_cost":
             bops, macs = compute_bops_and_macs(res)
@@ -148,10 +148,9 @@ def inference_cost(
                 per_node_breakdown[node_name] = node_res
             combined_results[i] = per_node_breakdown
     return combined_results
-
+    
 def main():
     clize.run(inference_cost)
 
-
 if __name__ == "__main__":
     main()

From 1dfda07eb04924715687e564c6acd28515cca889 Mon Sep 17 00:00:00 2001
From: Yaman Umuroglu <maltanar@gmail.com>
Date: Tue, 21 May 2024 09:21:40 +0200
Subject: [PATCH 4/7] fix linting

---
 src/qonnx/core/datatype.py             |  2 ++
 src/qonnx/util/inference_cost.py       | 13 +++++++++----
 tests/analysis/test_matmul_mac_cost.py | 12 ++++++------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/qonnx/core/datatype.py b/src/qonnx/core/datatype.py
index 84365289..f37d4eea 100644
--- a/src/qonnx/core/datatype.py
+++ b/src/qonnx/core/datatype.py
@@ -144,6 +144,7 @@ def to_numpy_dt(self):
     def get_canonical_name(self):
         return "FLOAT32"
 
+
 class Float16Type(BaseDataType):
     def bitwidth(self):
         return 16
@@ -175,6 +176,7 @@ def to_numpy_dt(self):
     def get_canonical_name(self):
         return "FLOAT16"
 
+
 class IntType(BaseDataType):
     def __init__(self, bitwidth, signed):
         super().__init__()
diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 30ac677d..57d5292d 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -44,6 +44,7 @@
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_shapes import InferShapes
 
+
 def compute_bops_and_macs(inf_cost_dict):
     total_bops = 0.0
     total_macs = 0.0
@@ -56,6 +57,7 @@ def compute_bops_and_macs(inf_cost_dict):
             total_macs += v
     return total_bops, total_macs
 
+
 def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
     total_mem_bits = 0.0
     total_mem_elems = 0.0
@@ -67,6 +69,7 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
             total_mem_elems += v
     return total_mem_bits, total_mem_elems
 
+
 def assign_mem_bits_and_elems(res_dict):
     mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w")
     mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o")
@@ -76,6 +79,7 @@ def assign_mem_bits_and_elems(res_dict):
     res_dict["total_mem_o_elems"] = mem_o_elems
     return res_dict
 
+
 def inference_cost(
     model_filename_or_wrapper,
     *,
@@ -96,7 +100,7 @@ def inference_cost(
         datatype inference and constant folding. Strongly recommended.
     :param discount_sparsity: If set, will discount op cost of MAC ops with a
         constant zero weight, and the mem cost of constant zero weights."""
-    
+
     combined_results = {}
     if isinstance(model_filename_or_wrapper, ModelWrapper):
         model = model_filename_or_wrapper
@@ -117,8 +121,7 @@ def inference_cost(
     model = model.transform(GiveReadableTensorNames())
     if output_onnx is not None:
         model.save(output_onnx)
-    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity,
-                                                        cost_breakdown))
+    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown))
     for i, res in ret.items():
         if i == "total_cost":
             bops, macs = compute_bops_and_macs(res)
@@ -148,9 +151,11 @@ def inference_cost(
                 per_node_breakdown[node_name] = node_res
             combined_results[i] = per_node_breakdown
     return combined_results
-    
+
+
 def main():
     clize.run(inference_cost)
 
+
 if __name__ == "__main__":
     main()
diff --git a/tests/analysis/test_matmul_mac_cost.py b/tests/analysis/test_matmul_mac_cost.py
index 534618aa..fbdc0d2a 100644
--- a/tests/analysis/test_matmul_mac_cost.py
+++ b/tests/analysis/test_matmul_mac_cost.py
@@ -27,19 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-import pytest
-import qonnx
 from pkgutil import get_data
+
 import qonnx.util.inference_cost as infc
-from qonnx.util.cleanup import cleanup_model
 from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.cleanup import cleanup_model
 
 
 def test_matmul_mac_cost():
-    raw_model = get_data("qonnx","data/onnx/matmul_update/sdp.onnx")
+    raw_model = get_data("qonnx", "data/onnx/matmul_update/sdp.onnx")
     model = ModelWrapper(raw_model)
     cleaned_model = cleanup_model(model)
-    # Two Matmul layers with shape (i_shape, w_shape, o_shape), L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32])
+    # Two Matmul layers with shape (i_shape, w_shape, o_shape),
+    # L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32])
     inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False)
-    mac_cost = inf_cost_dict['op_mac_FLOAT32_FLOAT32'] # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576
+    mac_cost = inf_cost_dict["op_mac_FLOAT32_FLOAT32"]  # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576
     assert mac_cost == 1048576.0, "Error: discrepancy in mac cost."

From 2cc6d526d89b6319c376bad6c37d3cb0a896d638 Mon Sep 17 00:00:00 2001
From: Yaman Umuroglu <maltanar@gmail.com>
Date: Tue, 21 May 2024 10:12:15 +0200
Subject: [PATCH 5/7] [InfCost] per-node norm mac/param counts, always floats
 for json

---
 src/qonnx/analysis/inference_cost.py          |  6 +++++
 src/qonnx/util/inference_cost.py              | 25 ++++++++-----------
 .../analysis/test_inference_cost_breakdown.py |  4 ++-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/qonnx/analysis/inference_cost.py b/src/qonnx/analysis/inference_cost.py
index 847058b7..c821d26a 100644
--- a/src/qonnx/analysis/inference_cost.py
+++ b/src/qonnx/analysis/inference_cost.py
@@ -117,6 +117,8 @@ def inference_cost_conv(model, node, discount_sparsity):
     mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name)
     w_mem_type_str = "mem_w_%s" % (wdt_name)
     o_mem_type_str = "mem_o_%s" % (odt_name)
+    # keep in floats to remain compatible with json serialization
+    n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem)
     ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem}
     return ret
 
@@ -161,6 +163,8 @@ def inference_cost_matmul(model, node, discount_sparsity):
     mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name)
     w_mem_type_str = "mem_w_%s" % (wdt_name)
     o_mem_type_str = "mem_o_%s" % (odt_name)
+    # keep in floats to remain compatible with json serialization
+    n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem)
     ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem}
     return ret
 
@@ -197,6 +201,8 @@ def inference_cost_upsample(model, node, discount_sparsity):
     mac_op_type_str = "op_mac_%s_%s" % (idt_name, idt_name)
     o_mem_type_str = "mem_o_%s" % (odt_name)
 
+    # keep in floats to remain compatible with json serialization
+    n_macs, o_mem = float(n_macs), float(o_mem)
     ret = {mac_op_type_str: n_macs, o_mem_type_str: o_mem}
     return ret
 
diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 57d5292d..12f1b56d 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -99,7 +99,9 @@ def inference_cost(
     :param preprocess: If set, run preprocessing steps such as shape inference,
         datatype inference and constant folding. Strongly recommended.
     :param discount_sparsity: If set, will discount op cost of MAC ops with a
-        constant zero weight, and the mem cost of constant zero weights."""
+        constant zero weight, and the mem cost of constant zero weights.
+    :param cost_breakdown: If set, include per-node (by name) and per-node-type
+        breakdowns as part of the returned inference cost dict."""
 
     combined_results = {}
     if isinstance(model_filename_or_wrapper, ModelWrapper):
@@ -130,26 +132,19 @@ def inference_cost(
             res["total_macs"] = macs
             if "unsupported" in res:
                 res["unsupported"] = str(res["unsupported"])
-            if output_json is not None:
-                with open(output_json, "w") as f:
-                    json.dump(res, f, sort_keys=True, indent=2)
             combined_results[i] = res
-        elif i == "optype_cost":
-            per_optype_breakdown = {}
+        else:
+            per_optype_or_node_breakdown = {}
             for optype, op_res in res.items():
                 bops, macs = compute_bops_and_macs(op_res)
                 op_res = assign_mem_bits_and_elems(op_res)
                 op_res["total_bops"] = bops
                 op_res["total_macs"] = macs
-                per_optype_breakdown[optype] = op_res
-            combined_results[i] = per_optype_breakdown
-        else:
-            per_node_breakdown = {}
-            for node_name in res.keys():
-                node_res = res[node_name]
-                node_res = assign_mem_bits_and_elems(node_res)
-                per_node_breakdown[node_name] = node_res
-            combined_results[i] = per_node_breakdown
+                per_optype_or_node_breakdown[optype] = op_res
+            combined_results[i] = per_optype_or_node_breakdown
+    if output_json is not None:
+        with open(output_json, "w") as f:
+            json.dump(combined_results, f, sort_keys=True, indent=2)
     return combined_results
 
 
diff --git a/tests/analysis/test_inference_cost_breakdown.py b/tests/analysis/test_inference_cost_breakdown.py
index b798eaf0..afa422b9 100644
--- a/tests/analysis/test_inference_cost_breakdown.py
+++ b/tests/analysis/test_inference_cost_breakdown.py
@@ -76,7 +76,9 @@ def test_inference_cost_breakdown(test_model):
     test_details = model_details[test_model]
     model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
     inf_cost = infca(model, discount_sparsity=False, cost_breakdown=True)
-    print(inf_cost.keys())
+    assert inf_cost["node_cost"]["Conv_0"]["total_macs"] == 118013952
+    assert inf_cost["node_cost"]["Conv_1"]["total_macs"] == 115605504
+    assert inf_cost["optype_cost"]["Conv"]["total_macs"] == 1813561344
     t_cost = inf_cost["total_cost"]  # total cost
     op_cost = aggregate_dict_keys(inf_cost["optype_cost"])  # cost per optype
     n_cost = aggregate_dict_keys(inf_cost["node_cost"])  # cost per node.

From 4dd2000ef179f0cd0c48a1389ada3e44f87c1550 Mon Sep 17 00:00:00 2001
From: Yaman Umuroglu <maltanar@gmail.com>
Date: Tue, 21 May 2024 10:13:58 +0200
Subject: [PATCH 6/7] update README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 0e9ff13f..dd9b6c66 100644
--- a/README.md
+++ b/README.md
@@ -101,6 +101,7 @@ Inference cost for CNV_2W2A.onnx
 }
 ```
 
+You can use the `--cost-breakdown` option to generate a more detailed report that covers per-node (by name) and per-op-type information.
 You can read more about the BOPS metric in [this paper](https://www.frontiersin.org/articles/10.3389/frai.2021.676564/full), Section 4.2 Bit Operations.
 
 ### Convert between different quantization representations

From a4e7e35a308535f72176e289d841716629e92bf9 Mon Sep 17 00:00:00 2001
From: Yaman Umuroglu <maltanar@gmail.com>
Date: Tue, 21 May 2024 14:46:50 +0200
Subject: [PATCH 7/7] [Test] fix changes return style for inference cost

---
 src/qonnx/util/inference_cost.py            |   2 +-
 tests/analysis/test_inference_cost.py       | 152 +++++++++++---------
 tests/analysis/test_matmul_mac_cost.py      |   2 +-
 tests/transformation/test_pruning.py        |   4 +-
 tests/transformation/test_quantize_graph.py |   4 +-
 5 files changed, 88 insertions(+), 76 deletions(-)

diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 12f1b56d..8041ecdc 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -133,7 +133,7 @@ def inference_cost(
             if "unsupported" in res:
                 res["unsupported"] = str(res["unsupported"])
             combined_results[i] = res
-        else:
+        elif i in ["optype_cost", "node_cost"]:
             per_optype_or_node_breakdown = {}
             for optype, op_res in res.items():
                 bops, macs = compute_bops_and_macs(op_res)
diff --git a/tests/analysis/test_inference_cost.py b/tests/analysis/test_inference_cost.py
index a94f57f4..572d2e14 100644
--- a/tests/analysis/test_inference_cost.py
+++ b/tests/analysis/test_inference_cost.py
@@ -34,90 +34,102 @@
 model_details_infcost = {
     "FINN-CNV_W2A2": {
         "expected_sparse": {
-            "op_mac_SCALEDINT<8>_INT2": 1345500.0,
-            "mem_w_INT2": 908033.0,
-            "mem_o_SCALEDINT<32>": 57600.0,
-            "op_mac_INT2_INT2": 35615771.0,
-            "mem_o_INT32": 85002.0,
-            "unsupported": "set()",
-            "discount_sparsity": True,
-            "total_bops": 163991084.0,
-            "total_macs": 36961271.0,
-            "total_mem_w_bits": 1816066.0,
-            "total_mem_w_elems": 908033.0,
-            "total_mem_o_bits": 4563264.0,
-            "total_mem_o_elems": 142602.0,
+            "total_cost": {
+                "op_mac_SCALEDINT<8>_INT2": 1345500.0,
+                "mem_w_INT2": 908033.0,
+                "mem_o_SCALEDINT<32>": 57600.0,
+                "op_mac_INT2_INT2": 35615771.0,
+                "mem_o_INT32": 85002.0,
+                "unsupported": "set()",
+                "discount_sparsity": True,
+                "total_bops": 163991084.0,
+                "total_macs": 36961271.0,
+                "total_mem_w_bits": 1816066.0,
+                "total_mem_w_elems": 908033.0,
+                "total_mem_o_bits": 4563264.0,
+                "total_mem_o_elems": 142602.0,
+            }
         },
         "expected_dense": {
-            "op_mac_SCALEDINT<8>_INT2": 1555200.0,
-            "mem_w_INT2": 1542848.0,
-            "mem_o_SCALEDINT<32>": 57600.0,
-            "op_mac_INT2_INT2": 57906176.0,
-            "mem_o_INT32": 85002.0,
-            "unsupported": "set()",
-            "discount_sparsity": False,
-            "total_bops": 256507904.0,
-            "total_macs": 59461376.0,
-            "total_mem_w_bits": 3085696.0,
-            "total_mem_w_elems": 1542848.0,
-            "total_mem_o_bits": 4563264.0,
-            "total_mem_o_elems": 142602.0,
+            "total_cost": {
+                "op_mac_SCALEDINT<8>_INT2": 1555200.0,
+                "mem_w_INT2": 1542848.0,
+                "mem_o_SCALEDINT<32>": 57600.0,
+                "op_mac_INT2_INT2": 57906176.0,
+                "mem_o_INT32": 85002.0,
+                "unsupported": "set()",
+                "discount_sparsity": False,
+                "total_bops": 256507904.0,
+                "total_macs": 59461376.0,
+                "total_mem_w_bits": 3085696.0,
+                "total_mem_w_elems": 1542848.0,
+                "total_mem_o_bits": 4563264.0,
+                "total_mem_o_elems": 142602.0,
+            }
         },
     },
     "FINN-TFC_W2A2": {
         "expected_sparse": {
-            "op_mac_INT2_INT2": 22355.0,
-            "mem_w_INT2": 22355.0,
-            "mem_o_INT32": 202.0,
-            "unsupported": "set()",
-            "discount_sparsity": True,
-            "total_bops": 89420.0,
-            "total_macs": 22355.0,
-            "total_mem_w_bits": 44710.0,
-            "total_mem_w_elems": 22355.0,
-            "total_mem_o_bits": 6464.0,
-            "total_mem_o_elems": 202.0,
+            "total_cost": {
+                "op_mac_INT2_INT2": 22355.0,
+                "mem_w_INT2": 22355.0,
+                "mem_o_INT32": 202.0,
+                "unsupported": "set()",
+                "discount_sparsity": True,
+                "total_bops": 89420.0,
+                "total_macs": 22355.0,
+                "total_mem_w_bits": 44710.0,
+                "total_mem_w_elems": 22355.0,
+                "total_mem_o_bits": 6464.0,
+                "total_mem_o_elems": 202.0,
+            }
         },
         "expected_dense": {
-            "op_mac_INT2_INT2": 59008.0,
-            "mem_w_INT2": 59008.0,
-            "mem_o_INT32": 202.0,
-            "unsupported": "set()",
-            "discount_sparsity": False,
-            "total_bops": 236032.0,
-            "total_macs": 59008.0,
-            "total_mem_w_bits": 118016.0,
-            "total_mem_w_elems": 59008.0,
-            "total_mem_o_bits": 6464.0,
-            "total_mem_o_elems": 202.0,
+            "total_cost": {
+                "op_mac_INT2_INT2": 59008.0,
+                "mem_w_INT2": 59008.0,
+                "mem_o_INT32": 202.0,
+                "unsupported": "set()",
+                "discount_sparsity": False,
+                "total_bops": 236032.0,
+                "total_macs": 59008.0,
+                "total_mem_w_bits": 118016.0,
+                "total_mem_w_elems": 59008.0,
+                "total_mem_o_bits": 6464.0,
+                "total_mem_o_elems": 202.0,
+            }
         },
     },
     "RadioML_VGG10": {
         "expected_sparse": {
-            "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0,
-            "mem_w_SCALEDINT<8>": 155617.0,
-            "mem_o_SCALEDINT<32>": 130328.0,
-            "unsupported": "set()",
-            "discount_sparsity": True,
-            "total_bops": 807699904.0,
-            "total_macs": 12620311.0,
-            "total_mem_w_bits": 1244936.0,
-            "total_mem_w_elems": 155617.0,
-            "total_mem_o_bits": 4170496.0,
-            "total_mem_o_elems": 130328.0,
+            "total_cost": {
+                "unsupported": "set()",
+                "discount_sparsity": True,
+                "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0,
+                "mem_w_SCALEDINT<8>": 155617.0,
+                "mem_o_SCALEDINT<32>": 130328.0,
+                "total_bops": 807699904.0,
+                "total_macs": 12620311.0,
+                "total_mem_w_bits": 1244936.0,
+                "total_mem_w_elems": 155617.0,
+                "total_mem_o_bits": 4170496.0,
+                "total_mem_o_elems": 130328.0,
+            }
         },
         "expected_dense": {
-            "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0,
-            "mem_w_SCALEDINT<8>": 159104.0,
-            "mem_o_SCALEDINT<32>": 130328.0,
-            "unsupported": "set()",
-            "discount_sparsity": False,
-            "total_bops": 823328768.0,
-            "total_macs": 12864512.0,
-            "total_mem_w_bits": 1272832.0,
-            "total_mem_w_elems": 159104.0,
-            "total_mem_o_bits": 4170496.0,
-            "total_mem_o_elems": 130328.0,
+            "total_cost": {
+                "unsupported": "set()",
+                "discount_sparsity": False,
+                "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0,
+                "mem_w_SCALEDINT<8>": 159104.0,
+                "mem_o_SCALEDINT<32>": 130328.0,
+                "total_bops": 823328768.0,
+                "total_macs": 12864512.0,
+                "total_mem_w_bits": 1272832.0,
+                "total_mem_w_elems": 159104.0,
+                "total_mem_o_bits": 4170496.0,
+                "total_mem_o_elems": 130328.0,
+            }
         },
     },
 }
diff --git a/tests/analysis/test_matmul_mac_cost.py b/tests/analysis/test_matmul_mac_cost.py
index fbdc0d2a..ff7dbc2f 100644
--- a/tests/analysis/test_matmul_mac_cost.py
+++ b/tests/analysis/test_matmul_mac_cost.py
@@ -40,6 +40,6 @@ def test_matmul_mac_cost():
     cleaned_model = cleanup_model(model)
     # Two Matmul layers with shape (i_shape, w_shape, o_shape),
     # L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32])
-    inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False)
+    inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False)["total_cost"]
     mac_cost = inf_cost_dict["op_mac_FLOAT32_FLOAT32"]  # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576
     assert mac_cost == 1048576.0, "Error: discrepancy in mac cost."
diff --git a/tests/transformation/test_pruning.py b/tests/transformation/test_pruning.py
index 85f9afc9..b2fdbcd8 100644
--- a/tests/transformation/test_pruning.py
+++ b/tests/transformation/test_pruning.py
@@ -90,7 +90,7 @@ def test_pruning_mnv1():
     # do cleanup including folding quantized weights
     model = cleanup_model(model, False)
     inp, golden = get_golden_in_and_output("MobileNetv1-w4a4")
-    cost0 = inference_cost(model, discount_sparsity=False)
+    cost0 = inference_cost(model, discount_sparsity=False)["total_cost"]
     assert cost0["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 10645344.0
     assert cost0["mem_w_SCALEDINT<8>"] == 864.0
     assert cost0["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 556357408.0
@@ -105,7 +105,7 @@ def test_pruning_mnv1():
     }
 
     model = model.transform(PruneChannels(prune_spec))
-    cost1 = inference_cost(model, discount_sparsity=False)
+    cost1 = inference_cost(model, discount_sparsity=False)["total_cost"]
     assert cost1["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 7318674.0
     assert cost1["mem_w_SCALEDINT<8>"] == 594.0
     assert cost1["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 546053216.0
diff --git a/tests/transformation/test_quantize_graph.py b/tests/transformation/test_quantize_graph.py
index 867f9b34..5278194d 100644
--- a/tests/transformation/test_quantize_graph.py
+++ b/tests/transformation/test_quantize_graph.py
@@ -120,14 +120,14 @@ def to_verify(model, test_details):
 def test_quantize_graph(test_model):
     test_details = model_details[test_model]
     model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
-    original_model_inf_cost = inference_cost(model, discount_sparsity=False)
+    original_model_inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"]
     nodes_pos = test_details["test_input"]
     model = model.transform(QuantizeGraph(nodes_pos))
     quantnodes_added = len(model.get_nodes_by_op_type("Quant"))
     assert quantnodes_added == 10  # 10 positions are specified.
     verification = to_verify(model, nodes_pos)
     assert verification == "Success"
-    inf_cost = inference_cost(model, discount_sparsity=False)
+    inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"]
     assert (
         inf_cost["total_macs"] == original_model_inf_cost["total_macs"]
     )  # "1814073344.0" must be same as the original model.