From 7608e7c90df931a8daeababf5239f24003a9dec5 Mon Sep 17 00:00:00 2001 From: Harish Date: Thu, 15 Feb 2024 11:29:36 +0000 Subject: [PATCH 1/7] inference cost breakdown --- src/qonnx/analysis/inference_cost.py | 25 ++++-- src/qonnx/util/inference_cost.py | 76 +++++++++++----- .../analysis/test_inference_cost_breakdown.py | 88 +++++++++++++++++++ 3 files changed, 160 insertions(+), 29 deletions(-) create mode 100644 tests/analysis/test_inference_cost_breakdown.py diff --git a/src/qonnx/analysis/inference_cost.py b/src/qonnx/analysis/inference_cost.py index 98e03428..847058b7 100644 --- a/src/qonnx/analysis/inference_cost.py +++ b/src/qonnx/analysis/inference_cost.py @@ -201,10 +201,10 @@ def inference_cost_upsample(model, node, discount_sparsity): return ret -def inference_cost(model, discount_sparsity=True): +def inference_cost(model, discount_sparsity=True, cost_breakdown=False): "Ensure all nodes have unique names prior to calling this analysis pass." - node_costs = {} + ret, node_costs, nodes_per_optype = {}, {}, {} zero_cost_ops = [ "MaxPool", "AveragePool", @@ -240,13 +240,24 @@ def inference_cost(model, discount_sparsity=True): if node.op_type in inference_cost_fxn_map.keys(): node_cost = inference_cost_fxn_map[node.op_type](model, node, discount_sparsity) node_costs[node.name] = node_cost + if node.op_type not in nodes_per_optype.keys(): + new_optype = {} + new_optype[node.name] = node_cost + nodes_per_optype[node.op_type] = new_optype + else: + nodes_per_optype[node.op_type][node.name] = node_cost elif node.op_type in zero_cost_ops: continue else: unsupported_ops.add(node.op_type) - - ret = aggregate_dict_keys(node_costs) - ret["unsupported"] = unsupported_ops - ret["discount_sparsity"] = discount_sparsity - + total = aggregate_dict_keys(node_costs) + total["unsupported"] = unsupported_ops + total["discount_sparsity"] = discount_sparsity + ret["total_cost"] = total + if cost_breakdown: + optype_cost = {} + for optype, resources in nodes_per_optype.items(): + optype_cost[optype] = aggregate_dict_keys(resources) + ret["optype_cost"] = optype_cost + ret["node_cost"] = node_costs return ret diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 86428c76..22ee140d 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -71,7 +71,13 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"): def inference_cost( - model_filename_or_wrapper, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True + model_filename_or_wrapper, + *, + output_json=None, + output_onnx=None, + preprocess=True, + discount_sparsity=True, + cost_breakdown=False ): """Return the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. @@ -83,8 +89,8 @@ def inference_cost( :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a - constant zero weight, and the mem cost of constant zero weights. - """ + constant zero weight, and the mem cost of constant zero weights.""" + combined_results = {} if isinstance(model_filename_or_wrapper, ModelWrapper): model = model_filename_or_wrapper else: @@ -104,25 +110,51 @@ def inference_cost( model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) - ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) - bops, macs = compute_bops_and_macs(ret) - mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(ret, "mem_w") - mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(ret, "mem_o") - ret["total_bops"] = bops - ret["total_macs"] = macs - ret["total_mem_w_bits"] = mem_w_bits - ret["total_mem_w_elems"] = mem_w_elems - ret["total_mem_o_bits"] = mem_o_bits - ret["total_mem_o_elems"] = mem_o_elems - - if "unsupported" in ret: - ret["unsupported"] = str(ret["unsupported"]) - - if output_json is not None: - with open(output_json, "w") as f: - json.dump(ret, f, sort_keys=True, indent=2) - - return ret + ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown)) + for i, res in ret.items(): + if i == "total_cost": + bops, macs = compute_bops_and_macs(res) + mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res, "mem_w") + mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res, "mem_o") + res["total_bops"] = bops + res["total_macs"] = macs + res["total_mem_w_bits"] = mem_w_bits + res["total_mem_w_elems"] = mem_w_elems + res["total_mem_o_bits"] = mem_o_bits + res["total_mem_o_elems"] = mem_o_elems + if "unsupported" in res: + res["unsupported"] = str(res["unsupported"]) + if output_json is not None: + with open(output_json, "w") as f: + json.dump(res, f, sort_keys=True, indent=2) + combined_results[i] = res + elif i == "optype_cost": + per_optype_breakdown = {} + for optype, op_res in res.items(): + bops, macs = compute_bops_and_macs(op_res) + mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(op_res, "mem_w") + mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(op_res, "mem_o") + op_res["total_bops"] = bops + op_res["total_macs"] = macs + op_res["total_mem_w_bits"] = mem_w_bits + op_res["total_mem_w_elems"] = mem_w_elems + op_res["total_mem_o_bits"] = mem_o_bits + op_res["total_mem_o_elems"] = mem_o_elems + per_optype_breakdown[optype] = op_res + combined_results[i] = per_optype_breakdown + else: + per_node_breakdown = {} + for node_name in res.keys(): + node_cost = res[node_name] + mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(node_cost, "mem_w") + mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(node_cost, "mem_o") + node_cost["total_mem_w_bits"] = mem_w_bits + node_cost["total_mem_w_elems"] = mem_w_elems + node_cost["total_mem_o_bits"] = mem_o_bits + node_cost["total_mem_o_elems"] = mem_o_elems + per_node_breakdown[node_name] = node_cost + combined_results[i] = per_node_breakdown + return combined_results def main(): diff --git a/tests/analysis/test_inference_cost_breakdown.py b/tests/analysis/test_inference_cost_breakdown.py new file mode 100644 index 00000000..b798eaf0 --- /dev/null +++ b/tests/analysis/test_inference_cost_breakdown.py @@ -0,0 +1,88 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of qonnx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import os +import urllib.request + +from qonnx.analysis.inference_cost import aggregate_dict_keys +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.util.cleanup import cleanup +from qonnx.util.inference_cost import inference_cost as infca + +download_url = "https://github.com/onnx/models/raw/main/validated/vision/" +download_url += "classification/resnet/model/resnet18-v1-7.onnx?download=" + +model_details = { + "resnet18-v1-7": { + "description": "Resnet18 Opset version 7.", + "url": download_url, + "enc": { + "a": "op_mac_FLOAT32_FLOAT32", + "b": "total_mem_w_bits", + "c": "total_mem_w_elems", + "d": "total_mem_o_bits", + "e": "total_mem_o_elems", + }, + }, +} + + +def download_model(test_model, do_cleanup=False, return_modelwrapper=False): + qonnx_url = model_details[test_model]["url"] + # download test data + dl_dir = "/tmp" + dl_file = dl_dir + f"/{test_model}.onnx" + ret = dl_file + if not os.path.isfile(dl_file): + urllib.request.urlretrieve(qonnx_url, dl_file) + if do_cleanup: + out_file = dl_dir + f"/{test_model}_clean.onnx" + cleanup(dl_file, out_file=out_file, override_inpsize=1) + ret = out_file + if return_modelwrapper: + ret = ModelWrapper(ret) + return ret + + +@pytest.mark.parametrize("test_model", model_details.keys()) +def test_inference_cost_breakdown(test_model): + test_details = model_details[test_model] + model = download_model(test_model, do_cleanup=True, return_modelwrapper=True) + inf_cost = infca(model, discount_sparsity=False, cost_breakdown=True) + print(inf_cost.keys()) + t_cost = inf_cost["total_cost"] # total cost + op_cost = aggregate_dict_keys(inf_cost["optype_cost"]) # cost per optype + n_cost = aggregate_dict_keys(inf_cost["node_cost"]) # cost per node. + enc = test_details["enc"] + assert t_cost[enc["a"]] == op_cost[enc["a"]] == n_cost[enc["a"]], "inf discrepancy" + assert t_cost[enc["b"]] == op_cost[enc["b"]] == n_cost[enc["b"]], "inf discrepancy" + assert t_cost[enc["c"]] == op_cost[enc["c"]] == n_cost[enc["c"]], "inf discrepancy" + assert t_cost[enc["d"]] == op_cost[enc["d"]] == n_cost[enc["d"]], "inf discrepancy" + assert t_cost[enc["e"]] == op_cost[enc["e"]] == n_cost[enc["e"]], "inf discrepancy" From d1207422626d70084e18c4a526dd43e440ac2825 Mon Sep 17 00:00:00 2001 From: Harish <62412574+Harsh9650@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:06:35 +0000 Subject: [PATCH 2/7] Update inference_cost.py --- src/qonnx/util/inference_cost.py | 34 ++++++++++++-------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 22ee140d..7a212321 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -69,6 +69,14 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"): total_mem_elems += v return total_mem_bits, total_mem_elems +def assign_mem_bits_and_elems(res_dict): + mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w") + mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o") + res_dict["total_mem_w_bits"] = mem_w_bits + res_dict["total_mem_w_elems"] = mem_w_elems + res_dict["total_mem_o_bits"] = mem_o_bits + res_dict["total_mem_o_elems"] = mem_o_elems + return res_dict def inference_cost( model_filename_or_wrapper, @@ -114,14 +122,9 @@ def inference_cost( for i, res in ret.items(): if i == "total_cost": bops, macs = compute_bops_and_macs(res) - mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res, "mem_w") - mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res, "mem_o") + res = assign_mem_bits_and_elems(res) res["total_bops"] = bops res["total_macs"] = macs - res["total_mem_w_bits"] = mem_w_bits - res["total_mem_w_elems"] = mem_w_elems - res["total_mem_o_bits"] = mem_o_bits - res["total_mem_o_elems"] = mem_o_elems if "unsupported" in res: res["unsupported"] = str(res["unsupported"]) if output_json is not None: @@ -132,31 +135,20 @@ def inference_cost( per_optype_breakdown = {} for optype, op_res in res.items(): bops, macs = compute_bops_and_macs(op_res) - mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(op_res, "mem_w") - mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(op_res, "mem_o") + op_res = assign_mem_bits_and_elems(op_res) op_res["total_bops"] = bops op_res["total_macs"] = macs - op_res["total_mem_w_bits"] = mem_w_bits - op_res["total_mem_w_elems"] = mem_w_elems - op_res["total_mem_o_bits"] = mem_o_bits - op_res["total_mem_o_elems"] = mem_o_elems per_optype_breakdown[optype] = op_res combined_results[i] = per_optype_breakdown else: per_node_breakdown = {} for node_name in res.keys(): - node_cost = res[node_name] - mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(node_cost, "mem_w") - mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(node_cost, "mem_o") - node_cost["total_mem_w_bits"] = mem_w_bits - node_cost["total_mem_w_elems"] = mem_w_elems - node_cost["total_mem_o_bits"] = mem_o_bits - node_cost["total_mem_o_elems"] = mem_o_elems - per_node_breakdown[node_name] = node_cost + node_res = res[node_name] + node_res = assign_mem_bits_and_elems(node_res) + per_node_breakdown[node_name] = node_res combined_results[i] = per_node_breakdown return combined_results - def main(): clize.run(inference_cost) From be9a9f838ab7bd9a107ab937d453f64b22da22f4 Mon Sep 17 00:00:00 2001 From: Harish <62412574+Harsh9650@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:07:52 +0000 Subject: [PATCH 3/7] Update inference_cost.py --- src/qonnx/util/inference_cost.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 7a212321..30ac677d 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -44,7 +44,6 @@ from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes - def compute_bops_and_macs(inf_cost_dict): total_bops = 0.0 total_macs = 0.0 @@ -57,7 +56,6 @@ def compute_bops_and_macs(inf_cost_dict): total_macs += v return total_bops, total_macs - def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"): total_mem_bits = 0.0 total_mem_elems = 0.0 @@ -98,6 +96,7 @@ def inference_cost( datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights.""" + combined_results = {} if isinstance(model_filename_or_wrapper, ModelWrapper): model = model_filename_or_wrapper @@ -118,7 +117,8 @@ def inference_cost( model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) - ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown)) + ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, + cost_breakdown)) for i, res in ret.items(): if i == "total_cost": bops, macs = compute_bops_and_macs(res) @@ -148,10 +148,9 @@ def inference_cost( per_node_breakdown[node_name] = node_res combined_results[i] = per_node_breakdown return combined_results - + def main(): clize.run(inference_cost) - if __name__ == "__main__": main() From 1dfda07eb04924715687e564c6acd28515cca889 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 21 May 2024 09:21:40 +0200 Subject: [PATCH 4/7] fix linting --- src/qonnx/core/datatype.py | 2 ++ src/qonnx/util/inference_cost.py | 13 +++++++++---- tests/analysis/test_matmul_mac_cost.py | 12 ++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/qonnx/core/datatype.py b/src/qonnx/core/datatype.py index 84365289..f37d4eea 100644 --- a/src/qonnx/core/datatype.py +++ b/src/qonnx/core/datatype.py @@ -144,6 +144,7 @@ def to_numpy_dt(self): def get_canonical_name(self): return "FLOAT32" + class Float16Type(BaseDataType): def bitwidth(self): return 16 @@ -175,6 +176,7 @@ def to_numpy_dt(self): def get_canonical_name(self): return "FLOAT16" + class IntType(BaseDataType): def __init__(self, bitwidth, signed): super().__init__() diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 30ac677d..57d5292d 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -44,6 +44,7 @@ from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes + def compute_bops_and_macs(inf_cost_dict): total_bops = 0.0 total_macs = 0.0 @@ -56,6 +57,7 @@ def compute_bops_and_macs(inf_cost_dict): total_macs += v return total_bops, total_macs + def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"): total_mem_bits = 0.0 total_mem_elems = 0.0 @@ -67,6 +69,7 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"): total_mem_elems += v return total_mem_bits, total_mem_elems + def assign_mem_bits_and_elems(res_dict): mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w") mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o") @@ -76,6 +79,7 @@ def assign_mem_bits_and_elems(res_dict): res_dict["total_mem_o_elems"] = mem_o_elems return res_dict + def inference_cost( model_filename_or_wrapper, *, @@ -96,7 +100,7 @@ def inference_cost( datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights.""" - + combined_results = {} if isinstance(model_filename_or_wrapper, ModelWrapper): model = model_filename_or_wrapper @@ -117,8 +121,7 @@ def inference_cost( model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) - ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, - cost_breakdown)) + ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown)) for i, res in ret.items(): if i == "total_cost": bops, macs = compute_bops_and_macs(res) @@ -148,9 +151,11 @@ def inference_cost( per_node_breakdown[node_name] = node_res combined_results[i] = per_node_breakdown return combined_results - + + def main(): clize.run(inference_cost) + if __name__ == "__main__": main() diff --git a/tests/analysis/test_matmul_mac_cost.py b/tests/analysis/test_matmul_mac_cost.py index 534618aa..fbdc0d2a 100644 --- a/tests/analysis/test_matmul_mac_cost.py +++ b/tests/analysis/test_matmul_mac_cost.py @@ -27,19 +27,19 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import pytest -import qonnx from pkgutil import get_data + import qonnx.util.inference_cost as infc -from qonnx.util.cleanup import cleanup_model from qonnx.core.modelwrapper import ModelWrapper +from qonnx.util.cleanup import cleanup_model def test_matmul_mac_cost(): - raw_model = get_data("qonnx","data/onnx/matmul_update/sdp.onnx") + raw_model = get_data("qonnx", "data/onnx/matmul_update/sdp.onnx") model = ModelWrapper(raw_model) cleaned_model = cleanup_model(model) - # Two Matmul layers with shape (i_shape, w_shape, o_shape), L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32]) + # Two Matmul layers with shape (i_shape, w_shape, o_shape), + # L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32]) inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False) - mac_cost = inf_cost_dict['op_mac_FLOAT32_FLOAT32'] # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576 + mac_cost = inf_cost_dict["op_mac_FLOAT32_FLOAT32"] # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576 assert mac_cost == 1048576.0, "Error: discrepancy in mac cost." From 2cc6d526d89b6319c376bad6c37d3cb0a896d638 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 21 May 2024 10:12:15 +0200 Subject: [PATCH 5/7] [InfCost] per-node norm mac/param counts, always floats for json --- src/qonnx/analysis/inference_cost.py | 6 +++++ src/qonnx/util/inference_cost.py | 25 ++++++++----------- .../analysis/test_inference_cost_breakdown.py | 4 ++- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/qonnx/analysis/inference_cost.py b/src/qonnx/analysis/inference_cost.py index 847058b7..c821d26a 100644 --- a/src/qonnx/analysis/inference_cost.py +++ b/src/qonnx/analysis/inference_cost.py @@ -117,6 +117,8 @@ def inference_cost_conv(model, node, discount_sparsity): mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name) w_mem_type_str = "mem_w_%s" % (wdt_name) o_mem_type_str = "mem_o_%s" % (odt_name) + # keep in floats to remain compatible with json serialization + n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem) ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem} return ret @@ -161,6 +163,8 @@ def inference_cost_matmul(model, node, discount_sparsity): mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name) w_mem_type_str = "mem_w_%s" % (wdt_name) o_mem_type_str = "mem_o_%s" % (odt_name) + # keep in floats to remain compatible with json serialization + n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem) ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem} return ret @@ -197,6 +201,8 @@ def inference_cost_upsample(model, node, discount_sparsity): mac_op_type_str = "op_mac_%s_%s" % (idt_name, idt_name) o_mem_type_str = "mem_o_%s" % (odt_name) + # keep in floats to remain compatible with json serialization + n_macs, o_mem = float(n_macs), float(o_mem) ret = {mac_op_type_str: n_macs, o_mem_type_str: o_mem} return ret diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 57d5292d..12f1b56d 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -99,7 +99,9 @@ def inference_cost( :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a - constant zero weight, and the mem cost of constant zero weights.""" + constant zero weight, and the mem cost of constant zero weights. + :param cost_breakdown: If set, include per-node (by name) and per-node-type + breakdowns as part of the returned inference cost dict.""" combined_results = {} if isinstance(model_filename_or_wrapper, ModelWrapper): @@ -130,26 +132,19 @@ def inference_cost( res["total_macs"] = macs if "unsupported" in res: res["unsupported"] = str(res["unsupported"]) - if output_json is not None: - with open(output_json, "w") as f: - json.dump(res, f, sort_keys=True, indent=2) combined_results[i] = res - elif i == "optype_cost": - per_optype_breakdown = {} + else: + per_optype_or_node_breakdown = {} for optype, op_res in res.items(): bops, macs = compute_bops_and_macs(op_res) op_res = assign_mem_bits_and_elems(op_res) op_res["total_bops"] = bops op_res["total_macs"] = macs - per_optype_breakdown[optype] = op_res - combined_results[i] = per_optype_breakdown - else: - per_node_breakdown = {} - for node_name in res.keys(): - node_res = res[node_name] - node_res = assign_mem_bits_and_elems(node_res) - per_node_breakdown[node_name] = node_res - combined_results[i] = per_node_breakdown + per_optype_or_node_breakdown[optype] = op_res + combined_results[i] = per_optype_or_node_breakdown + if output_json is not None: + with open(output_json, "w") as f: + json.dump(combined_results, f, sort_keys=True, indent=2) return combined_results diff --git a/tests/analysis/test_inference_cost_breakdown.py b/tests/analysis/test_inference_cost_breakdown.py index b798eaf0..afa422b9 100644 --- a/tests/analysis/test_inference_cost_breakdown.py +++ b/tests/analysis/test_inference_cost_breakdown.py @@ -76,7 +76,9 @@ def test_inference_cost_breakdown(test_model): test_details = model_details[test_model] model = download_model(test_model, do_cleanup=True, return_modelwrapper=True) inf_cost = infca(model, discount_sparsity=False, cost_breakdown=True) - print(inf_cost.keys()) + assert inf_cost["node_cost"]["Conv_0"]["total_macs"] == 118013952 + assert inf_cost["node_cost"]["Conv_1"]["total_macs"] == 115605504 + assert inf_cost["optype_cost"]["Conv"]["total_macs"] == 1813561344 t_cost = inf_cost["total_cost"] # total cost op_cost = aggregate_dict_keys(inf_cost["optype_cost"]) # cost per optype n_cost = aggregate_dict_keys(inf_cost["node_cost"]) # cost per node. From 4dd2000ef179f0cd0c48a1389ada3e44f87c1550 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 21 May 2024 10:13:58 +0200 Subject: [PATCH 6/7] update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0e9ff13f..dd9b6c66 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ Inference cost for CNV_2W2A.onnx } ``` +You can use the `--cost-breakdown` option to generate a more detailed report that covers per-node (by name) and per-op-type information. You can read more about the BOPS metric in [this paper](https://www.frontiersin.org/articles/10.3389/frai.2021.676564/full), Section 4.2 Bit Operations. ### Convert between different quantization representations From a4e7e35a308535f72176e289d841716629e92bf9 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 21 May 2024 14:46:50 +0200 Subject: [PATCH 7/7] [Test] fix changes return style for inference cost --- src/qonnx/util/inference_cost.py | 2 +- tests/analysis/test_inference_cost.py | 152 +++++++++++--------- tests/analysis/test_matmul_mac_cost.py | 2 +- tests/transformation/test_pruning.py | 4 +- tests/transformation/test_quantize_graph.py | 4 +- 5 files changed, 88 insertions(+), 76 deletions(-) diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py index 12f1b56d..8041ecdc 100644 --- a/src/qonnx/util/inference_cost.py +++ b/src/qonnx/util/inference_cost.py @@ -133,7 +133,7 @@ def inference_cost( if "unsupported" in res: res["unsupported"] = str(res["unsupported"]) combined_results[i] = res - else: + elif i in ["optype_cost", "node_cost"]: per_optype_or_node_breakdown = {} for optype, op_res in res.items(): bops, macs = compute_bops_and_macs(op_res) diff --git a/tests/analysis/test_inference_cost.py b/tests/analysis/test_inference_cost.py index a94f57f4..572d2e14 100644 --- a/tests/analysis/test_inference_cost.py +++ b/tests/analysis/test_inference_cost.py @@ -34,90 +34,102 @@ model_details_infcost = { "FINN-CNV_W2A2": { "expected_sparse": { - "op_mac_SCALEDINT<8>_INT2": 1345500.0, - "mem_w_INT2": 908033.0, - "mem_o_SCALEDINT<32>": 57600.0, - "op_mac_INT2_INT2": 35615771.0, - "mem_o_INT32": 85002.0, - "unsupported": "set()", - "discount_sparsity": True, - "total_bops": 163991084.0, - "total_macs": 36961271.0, - "total_mem_w_bits": 1816066.0, - "total_mem_w_elems": 908033.0, - "total_mem_o_bits": 4563264.0, - "total_mem_o_elems": 142602.0, + "total_cost": { + "op_mac_SCALEDINT<8>_INT2": 1345500.0, + "mem_w_INT2": 908033.0, + "mem_o_SCALEDINT<32>": 57600.0, + "op_mac_INT2_INT2": 35615771.0, + "mem_o_INT32": 85002.0, + "unsupported": "set()", + "discount_sparsity": True, + "total_bops": 163991084.0, + "total_macs": 36961271.0, + "total_mem_w_bits": 1816066.0, + "total_mem_w_elems": 908033.0, + "total_mem_o_bits": 4563264.0, + "total_mem_o_elems": 142602.0, + } }, "expected_dense": { - "op_mac_SCALEDINT<8>_INT2": 1555200.0, - "mem_w_INT2": 1542848.0, - "mem_o_SCALEDINT<32>": 57600.0, - "op_mac_INT2_INT2": 57906176.0, - "mem_o_INT32": 85002.0, - "unsupported": "set()", - "discount_sparsity": False, - "total_bops": 256507904.0, - "total_macs": 59461376.0, - "total_mem_w_bits": 3085696.0, - "total_mem_w_elems": 1542848.0, - "total_mem_o_bits": 4563264.0, - "total_mem_o_elems": 142602.0, + "total_cost": { + "op_mac_SCALEDINT<8>_INT2": 1555200.0, + "mem_w_INT2": 1542848.0, + "mem_o_SCALEDINT<32>": 57600.0, + "op_mac_INT2_INT2": 57906176.0, + "mem_o_INT32": 85002.0, + "unsupported": "set()", + "discount_sparsity": False, + "total_bops": 256507904.0, + "total_macs": 59461376.0, + "total_mem_w_bits": 3085696.0, + "total_mem_w_elems": 1542848.0, + "total_mem_o_bits": 4563264.0, + "total_mem_o_elems": 142602.0, + } }, }, "FINN-TFC_W2A2": { "expected_sparse": { - "op_mac_INT2_INT2": 22355.0, - "mem_w_INT2": 22355.0, - "mem_o_INT32": 202.0, - "unsupported": "set()", - "discount_sparsity": True, - "total_bops": 89420.0, - "total_macs": 22355.0, - "total_mem_w_bits": 44710.0, - "total_mem_w_elems": 22355.0, - "total_mem_o_bits": 6464.0, - "total_mem_o_elems": 202.0, + "total_cost": { + "op_mac_INT2_INT2": 22355.0, + "mem_w_INT2": 22355.0, + "mem_o_INT32": 202.0, + "unsupported": "set()", + "discount_sparsity": True, + "total_bops": 89420.0, + "total_macs": 22355.0, + "total_mem_w_bits": 44710.0, + "total_mem_w_elems": 22355.0, + "total_mem_o_bits": 6464.0, + "total_mem_o_elems": 202.0, + } }, "expected_dense": { - "op_mac_INT2_INT2": 59008.0, - "mem_w_INT2": 59008.0, - "mem_o_INT32": 202.0, - "unsupported": "set()", - "discount_sparsity": False, - "total_bops": 236032.0, - "total_macs": 59008.0, - "total_mem_w_bits": 118016.0, - "total_mem_w_elems": 59008.0, - "total_mem_o_bits": 6464.0, - "total_mem_o_elems": 202.0, + "total_cost": { + "op_mac_INT2_INT2": 59008.0, + "mem_w_INT2": 59008.0, + "mem_o_INT32": 202.0, + "unsupported": "set()", + "discount_sparsity": False, + "total_bops": 236032.0, + "total_macs": 59008.0, + "total_mem_w_bits": 118016.0, + "total_mem_w_elems": 59008.0, + "total_mem_o_bits": 6464.0, + "total_mem_o_elems": 202.0, + } }, }, "RadioML_VGG10": { "expected_sparse": { - "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0, - "mem_w_SCALEDINT<8>": 155617.0, - "mem_o_SCALEDINT<32>": 130328.0, - "unsupported": "set()", - "discount_sparsity": True, - "total_bops": 807699904.0, - "total_macs": 12620311.0, - "total_mem_w_bits": 1244936.0, - "total_mem_w_elems": 155617.0, - "total_mem_o_bits": 4170496.0, - "total_mem_o_elems": 130328.0, + "total_cost": { + "unsupported": "set()", + "discount_sparsity": True, + "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0, + "mem_w_SCALEDINT<8>": 155617.0, + "mem_o_SCALEDINT<32>": 130328.0, + "total_bops": 807699904.0, + "total_macs": 12620311.0, + "total_mem_w_bits": 1244936.0, + "total_mem_w_elems": 155617.0, + "total_mem_o_bits": 4170496.0, + "total_mem_o_elems": 130328.0, + } }, "expected_dense": { - "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0, - "mem_w_SCALEDINT<8>": 159104.0, - "mem_o_SCALEDINT<32>": 130328.0, - "unsupported": "set()", - "discount_sparsity": False, - "total_bops": 823328768.0, - "total_macs": 12864512.0, - "total_mem_w_bits": 1272832.0, - "total_mem_w_elems": 159104.0, - "total_mem_o_bits": 4170496.0, - "total_mem_o_elems": 130328.0, + "total_cost": { + "unsupported": "set()", + "discount_sparsity": False, + "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0, + "mem_w_SCALEDINT<8>": 159104.0, + "mem_o_SCALEDINT<32>": 130328.0, + "total_bops": 823328768.0, + "total_macs": 12864512.0, + "total_mem_w_bits": 1272832.0, + "total_mem_w_elems": 159104.0, + "total_mem_o_bits": 4170496.0, + "total_mem_o_elems": 130328.0, + } }, }, } diff --git a/tests/analysis/test_matmul_mac_cost.py b/tests/analysis/test_matmul_mac_cost.py index fbdc0d2a..ff7dbc2f 100644 --- a/tests/analysis/test_matmul_mac_cost.py +++ b/tests/analysis/test_matmul_mac_cost.py @@ -40,6 +40,6 @@ def test_matmul_mac_cost(): cleaned_model = cleanup_model(model) # Two Matmul layers with shape (i_shape, w_shape, o_shape), # L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32]) - inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False) + inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False)["total_cost"] mac_cost = inf_cost_dict["op_mac_FLOAT32_FLOAT32"] # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576 assert mac_cost == 1048576.0, "Error: discrepancy in mac cost." diff --git a/tests/transformation/test_pruning.py b/tests/transformation/test_pruning.py index 85f9afc9..b2fdbcd8 100644 --- a/tests/transformation/test_pruning.py +++ b/tests/transformation/test_pruning.py @@ -90,7 +90,7 @@ def test_pruning_mnv1(): # do cleanup including folding quantized weights model = cleanup_model(model, False) inp, golden = get_golden_in_and_output("MobileNetv1-w4a4") - cost0 = inference_cost(model, discount_sparsity=False) + cost0 = inference_cost(model, discount_sparsity=False)["total_cost"] assert cost0["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 10645344.0 assert cost0["mem_w_SCALEDINT<8>"] == 864.0 assert cost0["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 556357408.0 @@ -105,7 +105,7 @@ def test_pruning_mnv1(): } model = model.transform(PruneChannels(prune_spec)) - cost1 = inference_cost(model, discount_sparsity=False) + cost1 = inference_cost(model, discount_sparsity=False)["total_cost"] assert cost1["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 7318674.0 assert cost1["mem_w_SCALEDINT<8>"] == 594.0 assert cost1["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 546053216.0 diff --git a/tests/transformation/test_quantize_graph.py b/tests/transformation/test_quantize_graph.py index 867f9b34..5278194d 100644 --- a/tests/transformation/test_quantize_graph.py +++ b/tests/transformation/test_quantize_graph.py @@ -120,14 +120,14 @@ def to_verify(model, test_details): def test_quantize_graph(test_model): test_details = model_details[test_model] model = download_model(test_model, do_cleanup=True, return_modelwrapper=True) - original_model_inf_cost = inference_cost(model, discount_sparsity=False) + original_model_inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"] nodes_pos = test_details["test_input"] model = model.transform(QuantizeGraph(nodes_pos)) quantnodes_added = len(model.get_nodes_by_op_type("Quant")) assert quantnodes_added == 10 # 10 positions are specified. verification = to_verify(model, nodes_pos) assert verification == "Success" - inf_cost = inference_cost(model, discount_sparsity=False) + inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"] assert ( inf_cost["total_macs"] == original_model_inf_cost["total_macs"] ) # "1814073344.0" must be same as the original model.