From 52cfc4a2ac4c9feb729ad7acd2adbfb0e1a41207 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Wed, 13 Mar 2024 10:17:08 +0100 Subject: [PATCH 01/51] Fix clipping range issue in RoundAndClipThresholds transformation --- src/finn/transformation/streamline/round_thresholds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index 5ba5ee0ff5..2bf3630cff 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -57,10 +57,10 @@ def apply(self, model): model.set_tensor_datatype(n.input[1], idtype) graph_modified = True if idtype.is_integer() and ( - (Tnew < (idtype.min() - 1)).any() or (Tnew > (idtype.max() + 1)).any() + (Tnew < (idtype.min())).any() or (Tnew > (idtype.max())).any() ): # clip any large thresholds to input range + 1 - Tnew = np.clip(Tnew, idtype.min() - 1, idtype.max() + 1) + Tnew = np.clip(Tnew, idtype.min(), idtype.max()) model.set_initializer(n.input[1], Tnew) # use same datatype as inputs for thresholds model.set_tensor_datatype(n.input[1], idtype) From c8292e2a27bebb2254f278e409b00f448c35e600 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Sat, 6 Apr 2024 17:06:03 +0200 Subject: [PATCH 02/51] Rework RoundAndClipThresholds to avoid range and type promotion issues See https://github.com/Xilinx/finn/issues/978 --- .../streamline/round_thresholds.py | 105 +++++++++++++----- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index 2bf3630cff..2666242730 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -26,43 +26,90 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# Need numpy for modifying the onnx graph tensors, which are numpy style arrays import numpy as np + +# QONNX wrapper of ONNX model graphs +from qonnx.core.modelwrapper import ModelWrapper + +# QONNX graph transformation base class from qonnx.transformation.base import Transformation +# Transformation running qonnx datatype inference +from qonnx.transformation.infer_datatypes import InferDataTypes + +# Rounds and clips thresholds to integer values if the node inputs are integer, +# respecting range, representability and data type (promotion) of the container +# data type class RoundAndClipThresholds(Transformation): """For MultiThreshold nodes operating on integer inputs, round up thresholds values to the nearest integer. Additionally, if the input - is unsigned, sets negative thresholds to zero.""" + is unsigned, sets negative thresholds to zero. Type-casts thresholds (back) + to the float32 container type (this is separate from the quantization + annotation). Runs InferDataTypes() afterward to propagate any changes to the + quantization data types.""" - def apply(self, model): + # Applies the transform to a whole model graph + def apply(self, model: ModelWrapper): # noqa + # Get the model graph out of the model wrapper object graph = model.graph + # Keep track of whether the graph has been modified graph_modified = False - for n in graph.node: - if n.op_type == "MultiThreshold": - idtype = model.get_tensor_datatype(n.input[0]) - T = model.get_initializer(n.input[1]) - Tnew = np.ceil(T) - if idtype.is_integer() and (T != Tnew).any(): - # round up the thresholds to nearest integer - model.set_initializer(n.input[1], Tnew) - # use same datatype as inputs for thresholds - model.set_tensor_datatype(n.input[1], idtype) - graph_modified = True - if idtype.is_integer() and not idtype.signed() and (Tnew < 0).any(): - # clip any negative thresholds if input is unsigned - Tnew = np.clip(Tnew, 0, None) - model.set_initializer(n.input[1], Tnew) - # use same datatype as inputs for thresholds - model.set_tensor_datatype(n.input[1], idtype) - graph_modified = True - if idtype.is_integer() and ( - (Tnew < (idtype.min())).any() or (Tnew > (idtype.max())).any() - ): - # clip any large thresholds to input range + 1 - Tnew = np.clip(Tnew, idtype.min(), idtype.max()) - model.set_initializer(n.input[1], Tnew) - # use same datatype as inputs for thresholds - model.set_tensor_datatype(n.input[1], idtype) + # Iterate all nodes in the graph keeping track of the index + for index, node in enumerate(graph.node): + # Applies to initializer tensors of MultiThreshold operations + if node.op_type == "MultiThreshold": + # Try to get the thresholds initializer tensor + thresholds = model.get_initializer(node.input[1]) + # There might be no constant thresholds stored as initializer + # tensor inside the model + if thresholds is None: + # Nothing we can do, skip to the next node + continue + # Get the data type of the inputs to this operation + dtype = model.get_tensor_datatype(node.input[0]) + # This transformation only applies to thresholding operations + # operating on integer inputs + if not dtype.is_integer(): + # Nothing we can do, skip to the next node + continue + # Round thresholds up to nearest integer and clip thresholds + # outside the input range + # Note: This might promote the thresholds to float64 and + # introduce extra inaccuracies due to large integers not being + # exactly representable in floating-point representation. + # See for example: np.ceil(np.float32(16777217)) == 16777216 + # fmt: off + new_thresholds = np.clip( + np.ceil(thresholds), dtype.min(), dtype.max() + ) + # fmt: on + # Convert back to the preferred float32 container type + # Note: np.clip might have promoted the thresholds to float64 + # TODO: Maybe consider an int64 container type for thresholds + # rounded to integer? Need to check all other transformations + # and code generation through the whole FINN and QONNX stack + # first, as these probably assume a float32 container type. + new_thresholds = new_thresholds.astype(np.float32) + # Insert the rounded and clipped thresholds back into the model + model.set_initializer(node.input[1], new_thresholds) + # The rounded and clipped thresholds now fit into the input data + # type + model.set_tensor_datatype(node.input[1], dtype) + # Test whether the new thresholds actually differ from the old + # ones + if np.any(new_thresholds != thresholds): + # Track the graph has been modified to inform the transform + # container to exhaustively repeat this transformation until + # no changes are possible graph_modified = True - return (model, graph_modified) + # Immediately exit here to propagate the data type changes + # before considering the next node + break + # Some data types might have changed, do one pass of data type inference + # to propagate these changes through the graph + model = model.transform(InferDataTypes()) + # Return the transformed model and indicate whether the graph actually + # has been transformed to exhaustively apply this transformation again. + return model, graph_modified From 3109645cb2a2bb764bd982948a36e2788756efc1 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Sat, 6 Apr 2024 17:10:36 +0200 Subject: [PATCH 03/51] [Tests] Rework test-cases for reworked RoundAndClipThresholds See https://github.com/Xilinx/finn/issues/978 --- .../streamline/test_round_thresholds.py | 257 ++++++++++++++++-- 1 file changed, 227 insertions(+), 30 deletions(-) diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index 85c60b37d5..63375598a0 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -26,45 +26,242 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# fmt: off +# Disable formatter. This is deliberately formatted to stay within 80 characters +# per line. Black, however, formats some lines going beyond this. + +# Testing framework import pytest +# Use numpy for python execution / computing the ground truth expected values import numpy as np + +# Utility types and function for creating onnx nodes and graphs from onnx import TensorProto, helper + +# QONNX data types like INT25 from qonnx.core.datatype import DataType + +# QONNX wrapper of ONNX model graphs from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import qonnx_make_model +# Generate random tensors of QONNX/FINN data types for testing +from qonnx.util.basic import gen_finn_dt_tensor + +# Execution of onnx graphs within FINN import finn.core.onnx_exec as oxe + +# The transformation to be tested from finn.transformation.streamline import RoundAndClipThresholds -@pytest.mark.streamline -def test_round_thresholds(): - v = helper.make_tensor_value_info("v", TensorProto.FLOAT, [1, 4]) - thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [4, 1]) - out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4]) - node_def = helper.make_node( - "MultiThreshold", ["v", "thresholds"], ["out"], domain="qonnx.custom_op.general" +# Tests the RoundAndClipThresholds transformation under various input, output +# data type combinations with purely integer inputs. Without proper rounding, +# this tests only the clipping, range and type-casting behavior of the +# transformation. +@pytest.mark.parametrize("i_dtype", [ + # Explanation for selecting these test configurations: + # 1. Below 24-bit thresholds we will not observe any interesting rounding + # behavior, as all integers < 2^24 can be exactly represented in 32-bit + # floating-point. Thus, we test thresholds at 25-bit signed integers and + # generate test inputs slightly above and below this. + # 2. We want to test out-of-range clipping of thresholds, in particular + # clipping of the negative portion of signed thresholds. Thus, we only + # generate signed thresholds, but test with signed and unsigned + # inputs of smaller, larger and equal range. + # 3. Testing proper floating-point thresholds requires a separate test-case + "INT23", "UINT23", "INT24", "UINT24", "INT25", "UINT25", "INT26", "UINT26" +]) +@pytest.mark.parametrize("o_dtype", [ + # Explanation for selecting these test configurations: + # 1. Outputs of MultiThreshold are typically much smaller bit-width than the + # inputs and thresholds. + # 2. However, with randomly samples thresholds from a rather large range due + # to the selected input bit-widths (see above), we risk not adequately + # covering the input range if we sample too few thresholds. The number of + # thresholds sampled depends on the bit-width of the output, thus we use + # rather high bit-width for testing. + # 3. For a "real" model, the quantization procedure *should* take care of + # adequately covering the true input range. + "INT8", "UINT8" +]) +@pytest.mark.parametrize("n_elems", [ + # Explanation for selecting these test configurations: + # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 + # 2. Large test case 256, hopefully amplifying any rarely occurring errors + 1, 2, 3, 4, 256 +]) +def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): + # Convert string representation of data type to onnx DataType + i_dtype = DataType[i_dtype] + t_dtype = DataType["INT25"] # Note: Matches configuration above + o_dtype = DataType[o_dtype] # noqa: Duplicate model setup code + # Create a dummy MultiThreshold operation to be tested + node = helper.make_node( + # Op-Type of the node + "MultiThreshold", + # MultiThreshold is implemented under the qonnx domain + domain="qonnx.custom_op.general", + # List the names of the input tensors + inputs=["inp", "thresholds"], + # List the names of the output tensors + outputs=["out"], + # The CustomOp needs to know the data type of the output to be produced + out_dtype=str(o_dtype) + ) + # Number of threshold values required to produce outputs of type o_dtype + n_thresholds = o_dtype.get_num_possible_values() - 1 + # Create tensor value infos for all input/output tensors involved + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, n_elems]) + out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, n_elems]) + # Create a tensor value info for the thresholds parameter tensor + # Note: Number of thresholds is determined by the output data type + thresholds = helper.make_tensor_value_info( + "thresholds", TensorProto.FLOAT, [n_elems, n_thresholds] + ) + # Combine node and tensor value infos into an onnx graph + graph = helper.make_graph([node], "thresholds", [inp, thresholds], [out]) + # Wrap the model graph in a ModelWrapper container + model = ModelWrapper(helper.make_model(graph)) + # Sample random tensors of the configured input data type + inp = gen_finn_dt_tensor(i_dtype, [1, n_elems]) + # Generate sorted thresholds for each of the input channels + thresholds = np.sort(gen_finn_dt_tensor(t_dtype, [n_elems, n_thresholds])) + # Set data type annotations for the input and thresholds tensor + model.set_tensor_datatype("inp", i_dtype) # noqa: Duplicate model execution + model.set_tensor_datatype("thresholds", t_dtype) + model.set_tensor_datatype("out", o_dtype) + # Set the thresholds as initializer input to the model + model.set_initializer("thresholds", thresholds) + # Execute the model before running the RoundAndClipThresholds transformation + out_expected = oxe.execute_onnx(model, {"inp": inp})["out"] + # Before rounding the threshold data type must be as annotated + assert model.get_tensor_datatype("thresholds") == t_dtype + # Run the transformation to be tested + model = model.transform(RoundAndClipThresholds()) + # After this transformation, the thresholds and output data type should be + # inferred correctly + assert model.get_tensor_datatype("thresholds") == i_dtype + assert model.get_tensor_datatype("out") == o_dtype + # After this transformation, the container type used to store the thresholds + # values must be float32. No other type-cast or type promotion may happen. + assert model.get_initializer("thresholds").dtype == np.float32 + # After rounding, all thresholds must be integers represented as float32 + assert all( + x.is_integer() for x in model.get_initializer("thresholds").flatten() + ) + # Execute the model after running the RoundAndClipThresholds transformation + out_produced = oxe.execute_onnx(model, {"inp": inp})["out"] + # Compare the results before and after: This is the pure integer test-case + # and no actual rounding should happen, thus the rounded operation should + # produce outputs exactly equal. + assert np.all(out_produced == out_expected) + + +# Tests the RoundAndClipThresholds transformation under various input, output +# data type combinations with purely integer inputs. This test case tests actual +# rounding of floating-point thresholds. +@pytest.mark.parametrize("i_dtype", [ + # Explanation for selecting these test configurations: + # 1. Below 24-bit thresholds we will not observe any interesting rounding + # behavior, as all integers < 2^24 can be exactly represented in 32-bit + # floating-point. Thus, we test thresholds at 25-bit signed integers and + # generate test inputs slightly above and below this. + # 2. We want to test out-of-range clipping of thresholds, in particular + # clipping of the negative portion of signed thresholds. Thus, we only + # generate signed thresholds, but test with signed and unsigned + # inputs of smaller, larger and equal range. + # 3. Testing proper floating-point thresholds requires a separate test-case + "INT23", "UINT23", "INT24", "UINT24", "INT25", "UINT25", "INT26", "UINT26" +]) +@pytest.mark.parametrize("o_dtype", [ + # Explanation for selecting these test configurations: + # 1. Outputs of MultiThreshold are typically much smaller bit-width than the + # inputs and thresholds. + # 2. However, with randomly samples thresholds from a rather large range due + # to the selected input bit-widths (see above), we risk not adequately + # covering the input range if we sample too few thresholds. The number of + # thresholds sampled depends on the bit-width of the output, thus we use + # rather high bit-width for testing. + # 3. For a "real" model, the quantization procedure *should* take care of + # adequately covering the true input range. + "INT8", "UINT8" +]) +@pytest.mark.parametrize("n_elems", [ + # Explanation for selecting these test configurations: + # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 + # 2. Large test case 256, hopefully amplifying any rarely occurring errors + 1, 2, 3, 4, 256 +]) +def test_round_and_clip_thresholds_floats(i_dtype, o_dtype, n_elems): + # Convert string representation of data type to onnx DataType + i_dtype = DataType[i_dtype] + t_dtype = DataType["FLOAT32"] + o_dtype = DataType[o_dtype] # noqa: Duplicate model setup code + # Create a dummy MultiThreshold operation to be tested + node = helper.make_node( + # Op-Type of the node + "MultiThreshold", + # MultiThreshold is implemented under the qonnx domain + domain="qonnx.custom_op.general", + # List the names of the input tensors + inputs=["inp", "thresholds"], + # List the names of the output tensors + outputs=["out"], + # The CustomOp needs to know the data type of the output to be produced + out_dtype=str(o_dtype) + ) + # Number of threshold values required to produce outputs of type o_dtype + n_thresholds = o_dtype.get_num_possible_values() - 1 + # Create tensor value infos for all input/output tensors involved + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, n_elems]) + out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, n_elems]) + # Create a tensor value info for the thresholds parameter tensor + # Note: Number of thresholds is determined by the output data type + thresholds = helper.make_tensor_value_info( + "thresholds", TensorProto.FLOAT, [n_elems, n_thresholds] + ) + # Combine node and tensor value infos into an onnx graph + graph = helper.make_graph([node], "thresholds", [inp, thresholds], [out]) + # Wrap the model graph in a ModelWrapper container + model = ModelWrapper(helper.make_model(graph)) + # Sample random tensors of the configured input data type + inp = gen_finn_dt_tensor(i_dtype, [1, n_elems]) + # Draw uniformly random prototype thresholds in [0,+1] range + thresholds = np.random.rand(n_elems, n_thresholds) + # Type alias to 25-bit signed integer type used to set the range of the + # thresholds + INT25 = DataType["INT25"] # noqa: Variable name not lowercase + # Map the prototype thresholds into the test integer range and sort + thresholds = np.sort((INT25.max() - INT25.min()) * thresholds + INT25.min()) + # Set data type annotations for the input and thresholds tensor + model.set_tensor_datatype("inp", i_dtype) # noqa: Duplicate model execution + model.set_tensor_datatype("thresholds", t_dtype) + model.set_tensor_datatype("out", o_dtype) + # Set the thresholds as initializer input to the model + model.set_initializer("thresholds", thresholds) + # Execute the model before running the RoundAndClipThresholds transformation + out_expected = oxe.execute_onnx(model, {"inp": inp})["out"] + # Before rounding the threshold data type must be as annotated + assert model.get_tensor_datatype("thresholds") == t_dtype + # Run the transformation to be tested + model = model.transform(RoundAndClipThresholds()) + # After this transformation, the thresholds and output data type should be + # inferred correctly + assert model.get_tensor_datatype("thresholds") == i_dtype + assert model.get_tensor_datatype("out") == o_dtype + # After this transformation, the container type used to store the thresholds + # values must be float32. No other type-cast or type promotion may happen. + assert model.get_initializer("thresholds").dtype == np.float32 + # After rounding, all thresholds must be integers represented as float32 + assert all( + x.is_integer() for x in model.get_initializer("thresholds").flatten() ) - graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) - model_def = qonnx_make_model(graph_def) - model = ModelWrapper(model_def) - threshold_val = np.asarray([[-1.1], [0.7], [2.3], [5.1]], dtype=np.float32) - model.set_initializer("thresholds", threshold_val) - model.set_tensor_datatype("v", DataType["INT8"]) - inp_dict_f = {"v": np.floor(threshold_val).T} - inp_dict_n = {"v": np.round(threshold_val).T} - inp_dict_c = {"v": np.ceil(threshold_val).T} - orig_f = oxe.execute_onnx(model, inp_dict_f)["out"] - orig_n = oxe.execute_onnx(model, inp_dict_n)["out"] - orig_c = oxe.execute_onnx(model, inp_dict_c)["out"] - assert model.get_tensor_datatype("thresholds") == DataType["FLOAT32"] - new_model = model.transform(RoundAndClipThresholds()) - # rounded up thresholds should have same dtype as input - assert new_model.get_tensor_datatype("thresholds") == DataType["INT8"] - new_f = oxe.execute_onnx(new_model, inp_dict_f)["out"] - new_n = oxe.execute_onnx(new_model, inp_dict_n)["out"] - new_c = oxe.execute_onnx(new_model, inp_dict_c)["out"] - assert np.isclose(orig_f, new_f, atol=1e-3).all() - assert np.isclose(orig_n, new_n, atol=1e-3).all() - assert np.isclose(orig_c, new_c, atol=1e-3).all() + # Execute the model after running the RoundAndClipThresholds transformation + out_produced = oxe.execute_onnx(model, {"inp": inp})["out"] + # Compare the results before and after: This is the floating-point test with + # actual rounding, this the transformed result may only be equal within some + # tolerance. + # Hm, never observed this to be relevant. For all test configurations, exact + # equality seems to hold, probably due to only integer inputs being tested. + assert np.allclose(out_produced, out_expected, atol=1.0e-3) From 1b2665b7947cd4a1ded9459bdcf515de485fa518 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Wed, 17 Jul 2024 13:05:42 +0100 Subject: [PATCH 04/51] small typo fix --- notebooks/advanced/2_custom_op.ipynb | 2 +- notebooks/advanced/4_advanced_builder_settings.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb index bdd2976412..4c80c0263b 100644 --- a/notebooks/advanced/2_custom_op.ipynb +++ b/notebooks/advanced/2_custom_op.ipynb @@ -649,7 +649,7 @@ "metadata": {}, "outputs": [], "source": [ - "# run with FINN's execute_onnx, custom node will use c++ execution\n", + "# run with FINN's execute_onnx, custom node will use C++ execution\n", "new_op_inst.set_nodeattr(\"exec_mode\", \"c++\")\n", "ret = execute_onnx(mixedop_graph_new, inp_dict)\n", "ret" diff --git a/notebooks/advanced/4_advanced_builder_settings.ipynb b/notebooks/advanced/4_advanced_builder_settings.ipynb index 5139377342..e0c326d7d5 100644 --- a/notebooks/advanced/4_advanced_builder_settings.ipynb +++ b/notebooks/advanced/4_advanced_builder_settings.ipynb @@ -1278,7 +1278,7 @@ "id": "f7012b9a", "metadata": {}, "source": [ - "In this section, we will have a peak into additional builder arguments the FINN compiler exposes. We will not be able to cover all but you will be able to have a look at a list and we encourage you to take your time to look into the different options there are to customize the FINN builder configuration." + "In this section, we will have a peek into additional builder arguments the FINN compiler exposes. We will not be able to cover all but you will be able to have a look at a list and we encourage you to take your time to look into the different options there are to customize the FINN builder configuration." ] }, { From 10fa30b3a47080310a749cbf7914fec3942b4e4e Mon Sep 17 00:00:00 2001 From: lstasytis Date: Wed, 17 Jul 2024 15:32:04 +0100 Subject: [PATCH 05/51] more typos and some rewording --- .../advanced/4_advanced_builder_settings.ipynb | 16 ++++++++-------- .../1-train-mlp-with-brevitas.ipynb | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/notebooks/advanced/4_advanced_builder_settings.ipynb b/notebooks/advanced/4_advanced_builder_settings.ipynb index e0c326d7d5..4a0f2bc695 100644 --- a/notebooks/advanced/4_advanced_builder_settings.ipynb +++ b/notebooks/advanced/4_advanced_builder_settings.ipynb @@ -199,7 +199,7 @@ "id": "d746eff3", "metadata": {}, "source": [ - "After each FINN builder step, the graph is saved as .onnx file. In the cell above we sort the intermediate models by time in descending order (`ls -t -r`) to visualize the builder flow. As you can see after the conversion to the FINN-ONNX format (`step_qonnx_to_finn`), the graph is prepared by tidy up and streamlining (`step_tidy_up` and `step_streamline`) and then the high level nodes are converted to HW abstraction layers (`step_convert_to_hw`). Then there is a partition created from all layers that were converted to HW layers (`step_create_dataflow_partition`), then we convert each of the HW abstraction layers into an HLS or RTL variant (`step_specialize_layers`). Afterwards optimizations are applied (`step_target_fps_parallelization`, `step_apply_folding_config` and `step_minimize_bit_width`). In the final step of this example we generate resource and performance reports for the network (`step_generate_estimate_reports`). Use the code below to investigate the network after each step." + "After each FINN builder step, the graph is saved as an .onnx file. In the cell above we sort the intermediate models by time in descending order (`ls -t -r`) to visualize the builder flow. As you can see after the conversion to the FINN-ONNX format (`step_qonnx_to_finn`), the graph is prepared by tidy up and streamlining (`step_tidy_up` and `step_streamline`) and then the high level nodes are converted to HW abstraction layers (`step_convert_to_hw`). Then there is a partition created from all layers that were converted to HW layers (`step_create_dataflow_partition`), then we convert each of the HW abstraction layers into an HLS or RTL variant (`step_specialize_layers`). Afterwards optimizations are applied (`step_target_fps_parallelization`, `step_apply_folding_config` and `step_minimize_bit_width`). In the final step of this example we generate resource and performance reports for the network (`step_generate_estimate_reports`). Use the code below to investigate the network after each step." ] }, { @@ -218,7 +218,7 @@ "id": "bccebd0d", "metadata": {}, "source": [ - "The analysis of these .onnx files can help us identifying points in the flow in which we might need to intervene and provide the compiler with additional information. When investigating the network after the conversion to HW layers, we can see that there are layers that were not converted. We can see this by clicking on the different nodes. HW layers have the module `finn.custom_op.fpgadataflow`." + "The analysis of these .onnx files can help us identify points in the flow in which we might need to intervene and provide the compiler with additional information. When investigating the network after the conversion to HW layers, we can see that there are layers that were not converted. We can see this by clicking on the different nodes. HW layers have the module `finn.custom_op.fpgadataflow`." ] }, { @@ -361,7 +361,7 @@ "id": "2809f6a7", "metadata": {}, "source": [ - "Each steps gets the model (`model: ModelWrapper`) and the build configuration (`cfg: DataflowBuildConfig`) as input arguments. Then a certain sequence of transformations is applied to the model. In some of the steps, verification can be run to ensure that the applied transformations have not changed the behaviour of the network. In the end the modified model is returned." + "Each steps gets the model (`model: ModelWrapper`) and the build configuration (`cfg: DataflowBuildConfig`) as input arguments. Then a certain sequence of transformations is applied to the model. In some of the steps, verification can be run to ensure that the applied transformations have not changed the behaviour of the network. In the end, the modified model is returned." ] }, { @@ -993,7 +993,7 @@ "id": "fd1519fe", "metadata": {}, "source": [ - "In the following part of the tutorial, we will use the auto generated json file as starting point to create two new json files which explore the `ram_style` attribute. We will use one of the generated reports from the FINN builder to see the impact of these changes.\n", + "In the following part of the tutorial, we will use the auto generated json file as a starting point to create two new json files which explore the `ram_style` attribute. We will use one of the generated reports from the FINN builder to see the impact of these changes.\n", "For that, we will extract the total resources from the *estimate_layer_resources.json* report in the following cell." ] }, @@ -1254,7 +1254,7 @@ "id": "97f87780", "metadata": {}, "source": [ - "The initial implementation already had a high utilization of BRAM, but the estimations went now up to ~500 BRAMs while the LUT count went down to ~99k." + "The initial implementation already had a high utilization of BRAM, but the estimations now went up to ~500 BRAMs while the LUT count went down to ~99k." ] }, { @@ -1302,7 +1302,7 @@ "id": "308d52ba", "metadata": {}, "source": [ - "Earlier in the tutorial, we had a look at how build steps are written. When investigating the `step_tidy_up`, we can see that before the changed model is returned a verification step can be run. In the case of `step_tidy_up` it is the step `\"initial python\"` that can be initiated by setting `VerificationStepType.TIDY_UP_PYTHON`." + "Earlier in the tutorial, we had a look at how build steps are written. When investigating the `step_tidy_up`, we can see that before the changed model is returned, a verification step can be run. In the case of `step_tidy_up` it is the step `\"initial python\"` that can be initiated by setting `VerificationStepType.TIDY_UP_PYTHON`." ] }, { @@ -1536,7 +1536,7 @@ "source": [ "There are attributes that come from the dataclasses-json class: `to_dict`, `to_json`, `schema`, `from_json`, `from_dict`. This class is used for the implementation of the FINN builder. In this tutorial, we are mainly interested in the FINN specific arguments. \n", "\n", - "Some of these arguments we have seen already in the Cybersecurity notebook and in this notebook, e.g. `target_fps`, `fpga_part` and `folding_config_file`. In the code of the FINN builder, the function of each builder argument is documents, you can have a look [here](https://github.com/Xilinx/finn/blob/dev/src/finn/builder/build_dataflow_config.py#L155) and scroll through the available builder arguments." + "Some of these arguments we have seen already in the Cybersecurity notebook and in this notebook, e.g. `target_fps`, `fpga_part` and `folding_config_file`. In the code of the FINN builder, the function of each builder argument is documented, you can have a look [here](https://github.com/Xilinx/finn/blob/dev/src/finn/builder/build_dataflow_config.py#L155) and scroll through the available builder arguments." ] }, { @@ -1602,7 +1602,7 @@ "id": "c249f141", "metadata": {}, "source": [ - "This concludes the advanced builder settings tutorial. Below you can find code that can help you investigating more of the builder arguments and invoking the whole flow to generate a bitfile." + "This concludes the advanced builder settings tutorial. Below you can find code that can help you in investigating more of the builder arguments and invoking the whole flow to generate a bitfile." ] }, { diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb index da037050bb..e2bece5777 100644 --- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb +++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb @@ -194,7 +194,7 @@ "source": [ "# Define a PyTorch Device \n", "\n", - "GPUs can significantly speed-up training of deep neural networks. We check for availability of a GPU and if so define it as target device." + "GPUs can significantly speed-up training of deep neural networks. We check for availability of a GPU and if so define it as the target device." ] }, { From f723c0cf34a06239ee12736b815c8c4e01d45c00 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 30 Jul 2024 09:52:14 +0100 Subject: [PATCH 06/51] updating cybersecurity example, first pass --- .../1-train-mlp-with-brevitas.ipynb | 4 +- .../2-import-into-finn-and-verify.ipynb | 97 ++++++++++++++++++- 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb index e2bece5777..3f7a9b1070 100644 --- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb +++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb @@ -709,9 +709,7 @@ "\n", "# ModelWrapper\n", "model = ModelWrapper(ready_model_filename)\n", - "# Setting the input datatype explicitly because it doesn't get derived from the export function\n", - "model.set_tensor_datatype(model.graph.input[0].name, DataType[\"BIPOLAR\"])\n", - "model = model.transform(ConvertQONNXtoFINN())\n", + "\n", "model.save(ready_model_filename)\n", "\n", "print(\"Model saved to %s\" % ready_model_filename)" diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb index 33b64e11c0..d2fda2e830 100644 --- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb +++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb @@ -51,9 +51,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. Import model into FINN with ModelWrapper \n", + "# 1. Import model into FINN with ModelWrapper and ConvertQONNXtoFINN \n", "\n", - "Now that we have the model in .onnx format, we can work with it using FINN. To import it into FINN, we'll use the [`ModelWrapper`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#qonnx.core.modelwrapper.ModelWrapper). It is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model." + "\n", + "## 1.1 Using ModelWrapper to load and observe a model\n", + "We first load the model which we prepared in the last notebook by using the\n", + "[`ModelWrapper`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#qonnx.core.modelwrapper.ModelWrapper). It is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model." ] }, { @@ -64,17 +67,23 @@ "source": [ "import os\n", "from qonnx.core.modelwrapper import ModelWrapper\n", + "from qonnx.core.datatype import DataType\n", + "from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN\n", "\n", "model_dir = os.environ['FINN_ROOT'] + \"/notebooks/end2end_example/cybersecurity\"\n", "ready_model_filename = model_dir + \"/cybsec-mlp-ready.onnx\"\n", - "model_for_sim = ModelWrapper(ready_model_filename)" + "\n", + "# ModelWrapper\n", + "model_for_sim = ModelWrapper(ready_model_filename)\n", + "\n", + "print(\"Model loaded from %s\" % ready_model_filename)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's have a look at some of the member functions exposed by `ModelWrapper` to see what kind of information we can extract from it." + "Now that we have the model in .onnx format, we can look at some of the operations that were introduced, however we cannot use it in FINN just yet. To import it into FINN, we will need to use the ConvertQONNXtoFINN transformation. But before that, let us use some of the member functions exposed by `ModelWrapper` to see what kind of information we can extract from it and have a baseline to compare to when we do call the ConvertQONNXtoFINN transformation." ] }, { @@ -121,7 +130,85 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note that the output tensor is (as of yet) marked as a float32 value, even though we know the output is binary. This will be automatically inferred by the compiler in the next step when we run the `InferDataTypes` transformation." + "Note that the output tensor is (as of yet) marked as a float32 value, even though we know the output is binary. This will get resolved when we call the `ConvertQONNXtoFINN` transformation, which internally features an `Infer_Data_Types` transformation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 Converting to from QONNX to FINN using ConvertQONNXtoFINN\n", + "\n", + "At this point, we would like to move from the QONNX intermediate representation (IR) onto the FINN IR. We can do this by using the ConvertQONNXtoFINN() function from FINN on a QONNX model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Setting the input datatype explicitly because it doesn't get derived from the export function\n", + "model_for_sim.set_tensor_datatype(model_for_sim.graph.input[0].name, DataType[\"BIPOLAR\"])\n", + "\n", + "# Calling the actual QONNX -> FINN transformation\n", + "model_for_sim = model_for_sim.transform(ConvertQONNXtoFINN())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can look at the tensor datatypes and operator types again to see how they have changed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from qonnx.core.datatype import DataType\n", + "\n", + "finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n", + "finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n", + "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n", + "print(\"Output tensor name: %s\" % finnonnx_out_tensor_name)\n", + "finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n", + "finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)\n", + "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n", + "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n", + "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n", + "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n", + "print(\"Input tensor datatype: %s\" % str(finnonnx_model_in_dt.name))\n", + "print(\"Output tensor datatype: %s\" % str(finnonnx_model_out_dt.name))\n", + "print(\"List of node operator types in the graph: \")\n", + "print([x.op_type for x in model_for_sim.graph.node])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that the input and output tensor datatypes now correctly show `BIPOLAR` while the operator types have also heavily changed compared to the QONNX version. This is because in FINN, we use operators more suitable for FPGA implementations. `ConvertQONNXtoFINN` internally called many transformations which change the operators in such a manner and we can actually peek at the source code to see them using the `showSrc` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from finn.util.visualization import showSrc\n", + "showSrc(ConvertQONNXtoFINN.apply)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, `ConvertQONNXtoFINN` turned GEMM operation into MatMuls, turned ReLU nodes into Thresholding nodes and so forth. However, these nodes do need further transformations before they can be turned into FPGA operators, which we handle in the next step." ] }, { From e19f8b6715cc9871c465c4f8b098a1cdf55bf272 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 6 Aug 2024 16:20:10 +0100 Subject: [PATCH 07/51] ONNX to QONNX and similar changes, updated text --- .../1-train-mlp-with-brevitas.ipynb | 28 +++-- .../2-import-into-finn-and-verify.ipynb | 111 ++++++++++-------- 2 files changed, 74 insertions(+), 65 deletions(-) diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb index 3f7a9b1070..73bb009e2d 100644 --- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb +++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb @@ -672,7 +672,7 @@ "\n", "[ONNX](https://onnx.ai/) is an open format built to represent machine learning models, and the FINN compiler expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx).\n", "\n", - "You can see below how we export a trained network in Brevitas into a FINN-compatible ONNX representation (QONNX). QONNX is the format we can export from Brevitas, to feed it into the FINN compiler, we will need to make a conversion to the FINN-ONNX format which is the intermediate representation the compiler works on. The conversion of the FINN-ONNX format is a FINN compiler transformation and to be able to apply it to our model, we will need to wrap it into [ModelWrapper](https://finn.readthedocs.io/en/latest/internals.html#modelwrapper). This is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model. Then we can call the conversion function to obtain the model in FINN-ONNX format." + "You can see below how we export a trained network in Brevitas into a FINN-compatible ONNX representation (QONNX). QONNX is the format we can export from Brevitas, to feed it into the FINN compiler, we will need to make a conversion to the FINN-ONNX format which is the intermediate representation the compiler works on. The conversion of the FINN-ONNX format is a FINN compiler transformation and to be able to apply it to our model, we will need to wrap it into [ModelWrapper](https://finn.readthedocs.io/en/latest/internals.html#modelwrapper). This is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model. Then we can call the conversion function to obtain the model in FINN-ONNX format. This will be done in the next notebook. For now, we simply export and save the QONNX model." ] }, { @@ -707,11 +707,6 @@ "# clean-up\n", "qonnx_cleanup(ready_model_filename, out_file=ready_model_filename)\n", "\n", - "# ModelWrapper\n", - "model = ModelWrapper(ready_model_filename)\n", - "\n", - "model.save(ready_model_filename)\n", - "\n", "print(\"Model saved to %s\" % ready_model_filename)" ] }, @@ -719,16 +714,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## View the Exported ONNX in Netron\n", + "## View the Exported QONNX in Netron\n", "\n", - "Let's examine the exported ONNX model with [Netron](https://github.com/lutzroeder/netron), which is a visualizer for neural networks and allows interactive investigation of network properties. For example, you can click on the individual nodes and view the properties. Particular things of note:\n", + "Let's examine the exported QONNX model with [Netron](https://github.com/lutzroeder/netron), which is a visualizer for neural networks and allows interactive investigation of network properties. For example, you can click on the individual nodes and view the properties. Particular things of note:\n", "\n", - "* The input tensor \"0\" is annotated with `quantization: finn_datatype: BIPOLAR`\n", "* The input preprocessing (x + 1) / 2 is exported as part of the network (initial `Add` and `Div` layers)\n", - "* Brevitas `QuantLinear` layers are exported to ONNX as `MatMul`. We've exported the padded version; shape of the first MatMul node's weight parameter is 600x64\n", - "* The weight parameters (second inputs) for MatMul nodes are annotated with `quantization: finn_datatype: INT2`\n", - "* The quantized activations are exported as `MultiThreshold` nodes with `domain=qonnx.custom_op.general`\n", - "* There's a final `MultiThreshold` node with threshold=0 to produce the final bipolar output (this is the `qnt_output` from `CybSecMLPForExport`" + "* Brevitas `QuantLinear` layers are exported to QONNX as `Gemm`. We've exported the padded version; shape of the first `Gemm` node's weight parameter is 600x64\n", + "* The quantized activations are exported as `Quant` nodes with `domain=qonnx.custom_op.general`\n", + "* The weight parameters (second inputs) for the `Gemm` node can also be viewed by opening up the producer `Quant` node, scrolling down to the `Inputs` section and pressing the plus sign to the right of the first input parameter. For the first `Quant` node, this would be the parameter named `Quant_0_param0`\n", + "* The bitwidth of the weights are also shown as the 4th value in the `Quant` node, (3=2) meaning that we quantize to 2 bits total.\n", + "* There's a final `BipolarQuant` node with a single input and output value to produce the final bipolar output (this is the `qnt_output` from `CybSecMLPForExport`)" ] }, { @@ -749,6 +744,13 @@ "## That's it! \n", "You created, trained and tested a quantized MLP that is ready to be loaded into FINN, congratulations! You can now proceed to the next notebook." ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb index d2fda2e830..522a25f5c7 100644 --- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb +++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb @@ -12,7 +12,7 @@ "\n", "**Also remember to 'close and halt' any other FINN notebooks, since Netron visualizations use the same port.**\n", "\n", - "In this notebook we will show how to import the network we trained in Brevitas and verify it in the FINN compiler. \n", + "In this notebook we will show how to import the network we trained in Brevitas, convert it from the QONNX format to FINN-ONNX, going over the differences and, lastly, verify it in the FINN compiler. \n", "This verification process can actually be done at various stages in the compiler [as explained in this notebook](../bnn-pynq/tfc_end2end_verification.ipynb) but for this example we'll only consider the first step: verifying the exported high-level FINN-ONNX model.\n", "Another goal of this notebook is to introduce you to the concept of *graph transformations* -- we'll be applying some transformations to the graph to make it executable for verification. \n", "Once this model is sucessfully verified, we'll generate an FPGA accelerator from it in the next notebook." @@ -41,7 +41,7 @@ "source": [ "## Outline\n", "-------------\n", - "1. [Import model into FINN with ModelWrapper](#brevitas_import_visualization)\n", + "1. [Convert model from QONNX to FINN-ONNX](#brevitas_import_visualization)\n", "2. [Network preparations: Tidy-up transformations](#network_preparations)\n", "3. [Load the dataset and Brevitas model](#load_dataset) \n", "4. [Compare FINN and Brevitas execution](#compare_brevitas)" @@ -51,7 +51,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. Import model into FINN with ModelWrapper and ConvertQONNXtoFINN \n", + "# 1. Convert model from QONNX to FINN-ONNX \n", + "\n", + "\n", + "To answer the question of why we need a conversion from QONNX to FINN-ONNX in the first place, it is important to note key differences between the three representations: ONNX, QONNX and FINN-ONNX.\n", + "\n", + "Currently, ONNX provides only limited support for quantizing data types, while QONNX and FINN-ONNX provide fully flexible quantization support. However the way in which they do differs: QONNX provides special node types called `Quant` which ingest weights or previous node output streams as inputs to produce quantized output streams. However, this node is not designed with dataflow architectures in mind, with each node instance only performing the quantization on one individual input stream. Meanwhile, FINN-ONNX has a special node type called `Thresholding`, which was designed with dataflow graph models in mind. Beyond, this, there are other node types which differ in FINN-ONNX as opposed to QONNX. Thus we need a conversion function, which we will explore in more detail shortly.\n", + "\n", + "Lastly, we want to emphasize that we use the uppercase naming (ONNX, QONNX, FINN-ONNX) for the intermediate representations (IR), while the lower case naming (onnx, qonnx, finn) are used to refer to the compiler toolchains themselves.\n", "\n", "\n", "## 1.1 Using ModelWrapper to load and observe a model\n", @@ -74,7 +81,7 @@ "ready_model_filename = model_dir + \"/cybsec-mlp-ready.onnx\"\n", "\n", "# ModelWrapper\n", - "model_for_sim = ModelWrapper(ready_model_filename)\n", + "model = ModelWrapper(ready_model_filename)\n", "\n", "print(\"Model loaded from %s\" % ready_model_filename)" ] @@ -83,7 +90,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now that we have the model in .onnx format, we can look at some of the operations that were introduced, however we cannot use it in FINN just yet. To import it into FINN, we will need to use the ConvertQONNXtoFINN transformation. But before that, let us use some of the member functions exposed by `ModelWrapper` to see what kind of information we can extract from it and have a baseline to compare to when we do call the ConvertQONNXtoFINN transformation." + "To import the model into FINN, we will need to use the `ConvertQONNXtoFINN` transformation. But before that, let us use some of the member functions exposed by `ModelWrapper` to see what kind of information we can extract from it and have a baseline to compare to when we do call the `ConvertQONNXtoFINN` transformation." ] }, { @@ -92,14 +99,14 @@ "metadata": {}, "outputs": [], "source": [ - "dir(model_for_sim)" + "dir(model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Many of these helper functions relate to extracting information about the structure and properties of the ONNX model. You can find out more about examining and manipulating ONNX models programmatically in [this tutorial](../../basics/0_how_to_work_with_onnx.ipynb), but we'll show a few basic functions here. For instance, we can extract the shape and datatype annotation for various tensors in the graph, as well as information related to the operation types associated with each node." + "Many of these helper functions relate to extracting information about the structure and properties of the ONNX model. You can find out more about examining and manipulating ONNX models programmatically in [this tutorial](../../basics/0_how_to_work_with_onnx.ipynb), but we'll show a few basic functions here. For instance, we can extract the shape and datatype annotation for various tensors in the graph, as well as information related to the operation types associated with each node. We will do this now." ] }, { @@ -110,27 +117,27 @@ "source": [ "from qonnx.core.datatype import DataType\n", "\n", - "finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n", - "finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n", - "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n", - "print(\"Output tensor name: %s\" % finnonnx_out_tensor_name)\n", - "finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n", - "finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)\n", - "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n", - "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n", - "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n", - "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n", - "print(\"Input tensor datatype: %s\" % str(finnonnx_model_in_dt.name))\n", - "print(\"Output tensor datatype: %s\" % str(finnonnx_model_out_dt.name))\n", + "in_tensor_name = model.graph.input[0].name\n", + "out_tensor_name = model.graph.output[0].name\n", + "print(\"Input tensor name: %s\" % in_tensor_name)\n", + "print(\"Output tensor name: %s\" % out_tensor_name)\n", + "model_in_shape = model.get_tensor_shape(in_tensor_name)\n", + "model_out_shape = model.get_tensor_shape(out_tensor_name)\n", + "print(\"Input tensor shape: %s\" % str(model_in_shape))\n", + "print(\"Output tensor shape: %s\" % str(model_out_shape))\n", + "model_in_dt = model.get_tensor_datatype(in_tensor_name)\n", + "model_out_dt = model.get_tensor_datatype(out_tensor_name)\n", + "print(\"Input tensor datatype: %s\" % str(model_in_dt.name))\n", + "print(\"Output tensor datatype: %s\" % str(model_out_dt.name))\n", "print(\"List of node operator types in the graph: \")\n", - "print([x.op_type for x in model_for_sim.graph.node])" + "print([x.op_type for x in model.graph.node])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note that the output tensor is (as of yet) marked as a float32 value, even though we know the output is binary. This will get resolved when we call the `ConvertQONNXtoFINN` transformation, which internally features an `Infer_Data_Types` transformation." + "Note that the input and output tensors are (as of yet) marked as a float32 values, even though we know they are binary. The output datatype will get inferred when we call the `ConvertQONNXtoFINN` transformation, which internally features an `InferDataTypes` transformation, while the input we will adjust manually with the `set_tensor_datatype` function." ] }, { @@ -139,7 +146,7 @@ "source": [ "## 1.2 Converting to from QONNX to FINN using ConvertQONNXtoFINN\n", "\n", - "At this point, we would like to move from the QONNX intermediate representation (IR) onto the FINN IR. We can do this by using the ConvertQONNXtoFINN() function from FINN on a QONNX model." + "At this point, we would like to move from the QONNX IR onto the FINN-ONNX IR. We can do this by using the `ConvertQONNXtoFINN()` function on a QONNX model." ] }, { @@ -150,10 +157,10 @@ "source": [ "\n", "# Setting the input datatype explicitly because it doesn't get derived from the export function\n", - "model_for_sim.set_tensor_datatype(model_for_sim.graph.input[0].name, DataType[\"BIPOLAR\"])\n", + "model.set_tensor_datatype(model.graph.input[0].name, DataType[\"BIPOLAR\"])\n", "\n", "# Calling the actual QONNX -> FINN transformation\n", - "model_for_sim = model_for_sim.transform(ConvertQONNXtoFINN())" + "model = model.transform(ConvertQONNXtoFINN())" ] }, { @@ -171,20 +178,20 @@ "source": [ "from qonnx.core.datatype import DataType\n", "\n", - "finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n", - "finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n", - "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n", - "print(\"Output tensor name: %s\" % finnonnx_out_tensor_name)\n", - "finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n", - "finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)\n", - "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n", - "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n", - "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n", - "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n", - "print(\"Input tensor datatype: %s\" % str(finnonnx_model_in_dt.name))\n", - "print(\"Output tensor datatype: %s\" % str(finnonnx_model_out_dt.name))\n", + "in_tensor_name = model.graph.input[0].name\n", + "out_tensor_name = model.graph.output[0].name\n", + "print(\"Input tensor name: %s\" % in_tensor_name)\n", + "print(\"Output tensor name: %s\" % out_tensor_name)\n", + "model_in_shape = model.get_tensor_shape(in_tensor_name)\n", + "model_out_shape = model.get_tensor_shape(out_tensor_name)\n", + "print(\"Input tensor shape: %s\" % str(model_in_shape))\n", + "print(\"Output tensor shape: %s\" % str(model_out_shape))\n", + "model_in_dt = model.get_tensor_datatype(in_tensor_name)\n", + "model_out_dt = model.get_tensor_datatype(out_tensor_name)\n", + "print(\"Input tensor datatype: %s\" % str(model_in_dt.name))\n", + "print(\"Output tensor datatype: %s\" % str(model_out_dt.name))\n", "print(\"List of node operator types in the graph: \")\n", - "print([x.op_type for x in model_for_sim.graph.node])" + "print([x.op_type for x in model.graph.node])" ] }, { @@ -208,7 +215,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we can see, `ConvertQONNXtoFINN` turned GEMM operation into MatMuls, turned ReLU nodes into Thresholding nodes and so forth. However, these nodes do need further transformations before they can be turned into FPGA operators, which we handle in the next step." + "As we can see, `ConvertQONNXtoFINN` turned `Gemm` operation into `MatMuls` using the `GemmToMatMul()` transform and turned `Quant` nodes into `Thresholding` nodes using the `ConvertQuantActToMultiThreshold()` transform to name a few. However, these nodes do need further transformations before they can be turned into FPGA operators, which we handle in the next step." ] }, { @@ -233,15 +240,15 @@ "from qonnx.transformation.infer_datatypes import InferDataTypes\n", "from qonnx.transformation.fold_constants import FoldConstants\n", "\n", - "model_for_sim = model_for_sim.transform(InferShapes())\n", - "model_for_sim = model_for_sim.transform(FoldConstants())\n", - "model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())\n", - "model_for_sim = model_for_sim.transform(GiveReadableTensorNames())\n", - "model_for_sim = model_for_sim.transform(InferDataTypes())\n", - "model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())\n", + "model = model.transform(InferShapes())\n", + "model = model.transform(FoldConstants())\n", + "model = model.transform(GiveUniqueNodeNames())\n", + "model = model.transform(GiveReadableTensorNames())\n", + "model = model.transform(InferDataTypes())\n", + "model = model.transform(RemoveStaticGraphInputs())\n", "\n", "verif_model_filename = model_dir + \"/cybsec-mlp-verification.onnx\"\n", - "model_for_sim.save(verif_model_filename)" + "model.save(verif_model_filename)" ] }, { @@ -396,22 +403,22 @@ "import finn.core.onnx_exec as oxe\n", "\n", "def inference_with_finn_onnx(current_inp):\n", - " finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n", - " finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n", - " finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n", + " in_tensor_name = model.graph.input[0].name\n", + " model_in_shape = model.get_tensor_shape(in_tensor_name)\n", + " out_tensor_name = model.graph.output[0].name\n", " # convert input to numpy for FINN\n", " current_inp = current_inp.detach().numpy()\n", " # add padding and re-scale to bipolar\n", " current_inp = np.pad(current_inp, [(0, 0), (0, 7)])\n", " current_inp = 2*current_inp-1\n", " # reshape to expected input (add 1 for batch dimension)\n", - " current_inp = current_inp.reshape(finnonnx_model_in_shape)\n", + " current_inp = current_inp.reshape(model_in_shape)\n", " # create the input dictionary\n", - " input_dict = {finnonnx_in_tensor_name : current_inp} \n", + " input_dict = {in_tensor_name : current_inp} \n", " # run with FINN's execute_onnx\n", - " output_dict = oxe.execute_onnx(model_for_sim, input_dict)\n", + " output_dict = oxe.execute_onnx(model, input_dict)\n", " #get the output tensor\n", - " finn_output = output_dict[finnonnx_out_tensor_name] \n", + " finn_output = output_dict[out_tensor_name] \n", " return finn_output" ] }, From 84cbc0cbbadf1633033a4079b623114cfc459fee Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Wed, 7 Aug 2024 11:34:05 +0200 Subject: [PATCH 08/51] [ConvolutionInputGenerator] Make infer_node_datatype update attributes Without updating the datatype attributes of the node, there might be a mismatch between tensor annotations (the actual datatype) and the type assumed by the node. This becomes an issue for example when querying the bit-width of the stream when inserting data-width converters. --- .../fpgadataflow/convolutioninputgenerator.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 96f49069c7..1fb4940fb4 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper @@ -141,6 +142,27 @@ def infer_node_datatype(self, model): node = self.onnx_node # data type stays the same dtype = model.get_tensor_datatype(node.input[0]) + + # Test for changing input datatype + if dtype != self.get_nodeattr("inputDataType"): + # Issue a warning message + warnings.warn( + f"{node.name}: inputDataType changing from" + f" {self.get_nodeattr('inputDataType')} to {dtype}" + ) + # Set the new datatype attribute + self.set_nodeattr("inputDataType", dtype.name) + + # Test for changing output datatype + if dtype != self.get_nodeattr("outputDataType"): + # Issue a warning message + warnings.warn( + f"{node.name}: outputDataType changing from" + f" {self.get_nodeattr('outputDataType')} to {dtype}" + ) + # Set the new datatype attribute + self.set_nodeattr("outputDataType", dtype.name) + # Propagate the datatype through the model graph model.set_tensor_datatype(node.output[0], dtype) def verify_node(self): From a78f23b2d368cf94d32d5084f34f81affd44f516 Mon Sep 17 00:00:00 2001 From: Hannah Yan Date: Wed, 7 Aug 2024 13:59:28 +0100 Subject: [PATCH 09/51] Increased liveness threshold for verify_step_stitched_ip_rtlsim --- src/finn/builder/build_dataflow_steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index b8ed8daec7..bdbcc53d83 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -666,7 +666,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): estimate_network_performance = verify_model.analysis(dataflow_performance) prev_liveness = pyverilate_get_liveness_threshold_cycles() os.environ["LIVENESS_THRESHOLD"] = str( - int(estimate_network_performance["critical_path_cycles"]) + int(estimate_network_performance["critical_path_cycles"] * 1.1) ) if cfg.verify_save_rtlsim_waveforms: report_dir = cfg.output_dir + "/report" From 6e4115347a1ca3bc20058b26381c8c08b3c46284 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 8 Aug 2024 09:39:19 +0100 Subject: [PATCH 10/51] [NBs] Update text for cybersecurity notebooks --- .../1-train-mlp-with-brevitas.ipynb | 13 +++-------- .../2-import-into-finn-and-verify.ipynb | 22 +++++++++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb index 73bb009e2d..3f8d65497b 100644 --- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb +++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb @@ -53,7 +53,7 @@ " * [(Option 1) Train the Model from Scratch](#train_scratch)\n", " * [(Option 2) Load Pre-Trained Parameters](#load_pretrained)\n", "* [Network Surgery Before Export](#network_surgery)\n", - "* [Export to QONNX and Conversion to FINN-ONNX](#export_qonnx)" + "* [Export to QONNX](#export_qonnx)" ] }, { @@ -667,12 +667,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Export to QONNX and Conversion to FINN-ONNX \n", + "# Export to QONNX \n", "\n", "\n", "[ONNX](https://onnx.ai/) is an open format built to represent machine learning models, and the FINN compiler expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx).\n", "\n", - "You can see below how we export a trained network in Brevitas into a FINN-compatible ONNX representation (QONNX). QONNX is the format we can export from Brevitas, to feed it into the FINN compiler, we will need to make a conversion to the FINN-ONNX format which is the intermediate representation the compiler works on. The conversion of the FINN-ONNX format is a FINN compiler transformation and to be able to apply it to our model, we will need to wrap it into [ModelWrapper](https://finn.readthedocs.io/en/latest/internals.html#modelwrapper). This is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model. Then we can call the conversion function to obtain the model in FINN-ONNX format. This will be done in the next notebook. For now, we simply export and save the QONNX model." + "You can see below how we export a trained network in Brevitas into a FINN-compatible ONNX representation (QONNX). QONNX is the format we can export from Brevitas, to feed it into the FINN compiler, we will need to make a conversion to the FINN-ONNX format which is the intermediate representation the compiler works on. This will be done in the next notebook. For now, we simply export and save the QONNX model." ] }, { @@ -744,13 +744,6 @@ "## That's it! \n", "You created, trained and tested a quantized MLP that is ready to be loaded into FINN, congratulations! You can now proceed to the next notebook." ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb index 522a25f5c7..70f1acae0a 100644 --- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb +++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb @@ -54,11 +54,13 @@ "# 1. Convert model from QONNX to FINN-ONNX \n", "\n", "\n", - "To answer the question of why we need a conversion from QONNX to FINN-ONNX in the first place, it is important to note key differences between the three representations: ONNX, QONNX and FINN-ONNX.\n", + "Even though the input to finn is the QONNX format, an IR called FINN-ONNX is used inside the compiler. In this part of the notebook, we show how to convert QONNX to FINN-ONNX and explain the key differences between the three representations: ONNX, QONNX and FINN-ONNX.\n", "\n", - "Currently, ONNX provides only limited support for quantizing data types, while QONNX and FINN-ONNX provide fully flexible quantization support. However the way in which they do differs: QONNX provides special node types called `Quant` which ingest weights or previous node output streams as inputs to produce quantized output streams. However, this node is not designed with dataflow architectures in mind, with each node instance only performing the quantization on one individual input stream. Meanwhile, FINN-ONNX has a special node type called `Thresholding`, which was designed with dataflow graph models in mind. Beyond, this, there are other node types which differ in FINN-ONNX as opposed to QONNX. Thus we need a conversion function, which we will explore in more detail shortly.\n", + "QONNX and FINN-ONNX are extensions to the standard ONNX format. Currently, ONNX provides only limited support for expressing quantization, while QONNX and FINN-ONNX provide fully flexible quantization support. However the way in which they do differs: QONNX provides special node types called `Quant` which ingest weights or previous node output streams as inputs to produce quantized output streams. Meanwhile, FINN-ONNX uses tensor annotation to express quantization and has a special node type called `MultiThreshold`, which implements quantization on the activation data path.\n", "\n", - "Lastly, we want to emphasize that we use the uppercase naming (ONNX, QONNX, FINN-ONNX) for the intermediate representations (IR), while the lower case naming (onnx, qonnx, finn) are used to refer to the compiler toolchains themselves.\n", + "Beyond, this, there are other node types which differ in FINN-ONNX as opposed to QONNX. Thus we need a conversion function, which we will explore in more detail shortly.\n", + "\n", + "Lastly, we want to emphasize that we use the uppercase naming (ONNX, QONNX, FINN-ONNX) for the intermediate representations (IR), while the lower case naming (onnx, qonnx, finn) are usually used to refer to the compiler toolkits themselves.\n", "\n", "\n", "## 1.1 Using ModelWrapper to load and observe a model\n", @@ -144,7 +146,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 1.2 Converting to from QONNX to FINN using ConvertQONNXtoFINN\n", + "## 1.2 Converting to from QONNX to FINN-ONNX using ConvertQONNXtoFINN\n", "\n", "At this point, we would like to move from the QONNX IR onto the FINN-ONNX IR. We can do this by using the `ConvertQONNXtoFINN()` function on a QONNX model." ] @@ -155,7 +157,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Setting the input datatype explicitly because it doesn't get derived from the export function\n", "model.set_tensor_datatype(model.graph.input[0].name, DataType[\"BIPOLAR\"])\n", "\n", @@ -198,7 +199,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice that the input and output tensor datatypes now correctly show `BIPOLAR` while the operator types have also heavily changed compared to the QONNX version. This is because in FINN, we use operators more suitable for FPGA implementations. `ConvertQONNXtoFINN` internally called many transformations which change the operators in such a manner and we can actually peek at the source code to see them using the `showSrc` function." + "Notice that the input and output tensor datatypes now correctly show `BIPOLAR` while the operator types have also heavily changed compared to the QONNX version. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ConvertQONNXtoFINN` internally called many transformations which change the operators in such a manner and we can actually peek at the source code to see them using the `showSrc` function." ] }, { @@ -215,7 +223,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we can see, `ConvertQONNXtoFINN` turned `Gemm` operation into `MatMuls` using the `GemmToMatMul()` transform and turned `Quant` nodes into `Thresholding` nodes using the `ConvertQuantActToMultiThreshold()` transform to name a few. However, these nodes do need further transformations before they can be turned into FPGA operators, which we handle in the next step." + "As we can see, `ConvertQONNXtoFINN` turned `Gemm` operation into `MatMuls` using the `GemmToMatMul()` transform and turned `Quant` nodes into `MultiThreshold` nodes using the `ConvertQuantActToMultiThreshold()` transform to name a few. However, these nodes do need further transformations before they can be turned into FPGA operators." ] }, { From 2d9deb58015dc890899835302ff38eda8f8653be Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 13 Aug 2024 12:35:16 +0100 Subject: [PATCH 11/51] [Deps] Update onnx commit hash --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 2033973f2a..6ce9ad76d4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -27,7 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -QONNX_COMMIT="fd61cfeebbdaba351abf7e9d54cd785d7776fa4f" +QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" From f32cce868d373e97b80a071c0520cea6b58aa4aa Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 13 Aug 2024 13:55:28 +0100 Subject: [PATCH 12/51] Updated auto_pad_to_explicit_padding function path --- .../fpgadataflow/infer_pixel_padding_deconv.py | 5 ++--- .../test_fpgadataflow_convinputgenerator_rtl_dynamic.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/infer_pixel_padding_deconv.py b/src/finn/transformation/fpgadataflow/infer_pixel_padding_deconv.py index 8dbf7071fc..e1dcf1dde5 100644 --- a/src/finn/transformation/fpgadataflow/infer_pixel_padding_deconv.py +++ b/src/finn/transformation/fpgadataflow/infer_pixel_padding_deconv.py @@ -2,8 +2,7 @@ import warnings from onnx import TensorProto, helper from qonnx.transformation.base import Transformation -from qonnx.transformation.lower_convs_to_matmul import _auto_pad_to_explicit_padding -from qonnx.util.basic import get_by_name +from qonnx.util.basic import auto_pad_to_explicit_padding, get_by_name class InferPixelPaddingDeconv(Transformation): @@ -61,7 +60,7 @@ def apply(self, model): # use specified padding pad = get_by_name(n.attribute, "pads").ints else: - pad = _auto_pad_to_explicit_padding( + pad = auto_pad_to_explicit_padding( auto_pad, ifm_dim_h, ifm_dim_w, diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index 9c45b06f4a..02c86d9972 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -43,8 +43,8 @@ from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.lower_convs_to_matmul import ( LowerConvsToMatMul, - _auto_pad_to_explicit_padding, ) +from qonnx.util.basic import auto_pad_to_explicit_padding from qonnx.util.basic import gen_finn_dt_tensor, get_by_name, qonnx_make_model import finn.core.onnx_exec as oxe @@ -69,11 +69,11 @@ def create_conv_model(idim_h, idim_w, ifm, k, stride, ofm, idt, wdt, pad_mode, d group = ifm if depthwise else 1 group_str = str(group) ishp = (1, ifm, idim_h, idim_w) - pad_0 = _auto_pad_to_explicit_padding(pad_mode, idim_h, idim_w, k, k, stride, stride, 2) + pad_0 = auto_pad_to_explicit_padding(pad_mode, idim_h, idim_w, k, k, stride, stride, 2) int_dim_h = compute_conv_output_dim(idim_h, k, stride, total_pad=pad_0[0] + pad_0[2]) int_dim_w = compute_conv_output_dim(idim_w, k, stride, total_pad=pad_0[1] + pad_0[3]) - pad_1 = _auto_pad_to_explicit_padding(pad_mode, int_dim_h, int_dim_w, k, k, stride, stride, 2) + pad_1 = auto_pad_to_explicit_padding(pad_mode, int_dim_h, int_dim_w, k, k, stride, stride, 2) odim_h = compute_conv_output_dim(int_dim_h, k, stride, total_pad=pad_1[0] + pad_1[2]) odim_w = compute_conv_output_dim(int_dim_w, k, stride, total_pad=pad_1[1] + pad_1[3]) oshp = (1, ifm, odim_h, odim_w) if depthwise else (1, ofm, odim_h, odim_w) From f71f4ba55b3fd6f0b39e9e5166b86a54d135e9ba Mon Sep 17 00:00:00 2001 From: lstasytis1 Date: Tue, 13 Aug 2024 13:24:35 +0000 Subject: [PATCH 13/51] linted the commit --- ...test_fpgadataflow_convinputgenerator_rtl_dynamic.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index 02c86d9972..26ce8f5f0e 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -41,11 +41,13 @@ from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.transformation.lower_convs_to_matmul import ( - LowerConvsToMatMul, +from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from qonnx.util.basic import ( + auto_pad_to_explicit_padding, + gen_finn_dt_tensor, + get_by_name, + qonnx_make_model, ) -from qonnx.util.basic import auto_pad_to_explicit_padding -from qonnx.util.basic import gen_finn_dt_tensor, get_by_name, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw From e22201f800a573b88d55f9b0024454a8e10fa0d4 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 16 Aug 2024 16:15:40 +0100 Subject: [PATCH 14/51] [HWop-MVAU] Ensure shape is compatible in execution function --- src/finn/custom_op/fpgadataflow/matrixvectoractivation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 1c86ae7b7a..8f0a987bce 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -130,6 +130,8 @@ def get_nodeattr_types(self): def execute_node(self, context, graph): node = self.onnx_node in_act = context[node.input[0]] + # ensure that shape is compatible + in_act = in_act.reshape(self.get_normal_input_shape()) mvau_w_init = [x for x in graph.initializer if x.name == node.input[1]][0] mvau_w = np_helper.to_array(mvau_w_init) # Matrix multiplication From ec7be72196b5a4fa2e10cd2afcd83bada1977893 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 10 Jun 2024 16:19:31 +0100 Subject: [PATCH 15/51] [Transform] Skip broadcasting of thresholds in onnx conversion and extend conversion to hw layers --- .../fpgadataflow/convert_to_hw_layers.py | 74 +++++++++++++++++-- .../qonnx/qonnx_activation_handlers.py | 9 ++- 2 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index e14181b140..ea5025a098 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -529,6 +529,60 @@ def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False + # check first if global input is split + successors = model.find_consumers(graph.input[0].name) + dt = model.get_tensor_datatype(graph.input[0].name) + if successors is not None and len(successors) >= 2 and dt.is_integer(): + output_tensor = graph.input[0].name + n_outputs = len(successors) + dt = model.get_tensor_datatype(output_tensor) + + # create clone tensors + out_shape = model.get_tensor_shape(output_tensor) + out_tensor_clones = [] + for i in range(n_outputs): + clone = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(clone) + out_tensor_clones += [clone.name] + + num_ch = int(out_shape[-1]) + vecs = out_shape[:-1] + + # create node with no parallelization first + pe = 1 + + dup_node = helper.make_node( + "DuplicateStreams", + [output_tensor], + out_tensor_clones, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=dt.name, + numInputVectors=vecs, + NumOutputStreams=n_outputs, + outFIFODepths=[2] * n_outputs, + name="DuplicateStreams_" + output_tensor, + ) + + graph.node.insert(0, dup_node) + + # connect successors to out tensor clone + clone_idx = 0 + for successor in successors: + for i, succ_input in enumerate(successor.input): + if succ_input == output_tensor: + successor.input[i] = out_tensor_clones[clone_idx] + clone_idx += 1 + # if one node has multiple connections to the same output + # find_direct_successors will return one node per input + # so break the inner loop will result in correct behaviour + break + graph_modified = True + for node in graph.node: node_ind += 1 successors = model.find_consumers(node.output[0]) @@ -1206,7 +1260,7 @@ def apply(self, model): graph_modified = False for node in graph.node: node_ind += 1 - if node.op_type == "Sub": + if node.op_type in ["Sub", "Add"]: in0 = node.input[0] in1 = node.input[1] result = node.output[0] @@ -1230,14 +1284,15 @@ def apply(self, model): if not (idt0.is_integer() and idt1.is_integer()): continue - eltwiseOp = "Sub" + eltwiseOp = node.op_type nodes_to_remove = [node] - # look for a downstream Abs node - res_consumer = model.find_consumer(result) - if (res_consumer is not None) and (res_consumer.op_type == "Abs"): - eltwiseOp = "AbsDiff" - result = res_consumer.output[0] - nodes_to_remove.append(res_consumer) + if node.op_type == "Sub": + # look for a downstream Abs node + res_consumer = model.find_consumer(result) + if (res_consumer is not None) and (res_consumer.op_type == "Abs"): + eltwiseOp = "AbsDiff" + result = res_consumer.output[0] + nodes_to_remove.append(res_consumer) # check layout and convert if necessary in0_layout = model.get_tensor_layout(in0) @@ -1438,6 +1493,9 @@ def apply(self, model): if n.op_type == "MatMul" and model.get_tensor_sparsity(n.input[1]) is None: mm_input = n.input[0] mm_weight = n.input[1] + # if mm_weight is not constant, skip node + if model.get_initializer(n.input[1]) is None: + continue mm_output = n.output[0] mm_in_shape = model.get_tensor_shape(mm_input) mm_out_shape = model.get_tensor_shape(mm_output) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 323e391df4..92a9731c2a 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -537,11 +537,12 @@ def _calculate_thresholds(self): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t - # ToDo: The index 1 needs to be changed to -1 for the channels last format + # currently only per tensor or per channel quantization is supported num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1] - final_shape = (num_output_channels, num_thresholds) - if thresholds.shape != final_shape: - thresholds = np.broadcast_to(thresholds, final_shape) + assert ( + thresholds.shape[0] == 1 or thresholds.shape[0] == num_output_channels + ), """Quant node cannot be converted to MultiThreshold because only + per tensor or per channel quantization supported.""" return thresholds From e1d1f63732c09863e753511dc7229396938528e0 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 22 Aug 2024 09:21:43 +0100 Subject: [PATCH 16/51] [Transform] Add comment to streamingeltwise conversion --- src/finn/transformation/fpgadataflow/convert_to_hw_layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index ea5025a098..25a2032aeb 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1251,8 +1251,8 @@ def apply(self, model): class InferStreamingEltwise(Transformation): - """Convert eltwise Sub or Sub -> Abs to StreamingEltwise layer - with SubEltwise or AbsDiffEltwise op.""" + """Convert eltwise Add, Sub or Sub -> Abs to StreamingEltwise layer + with AddEltwise, SubEltwise or AbsDiffEltwise op.""" def apply(self, model): graph = model.graph From 188bf1715eb10e555da62f5d873408e43ea4dac8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 20 Jun 2024 10:49:44 +0100 Subject: [PATCH 17/51] [Util] Add v80 to versal list --- src/finn/util/basic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 91c191962f..0cb029a888 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -292,10 +292,10 @@ def memutil(req_mem_spec, primitive_spec): def is_versal(fpgapart): """Returns whether board is part of the Versal family""" - return ( - fpgapart[0:4] in ["xcvc", "xcve", "xcvp", "xcvm", "xqvc", "xqvm"] - or fpgapart[0:5] == "xqrvc" - ) + return fpgapart[0:4] in ["xcvc", "xcve", "xcvp", "xcvm", "xqvc", "xqvm"] or fpgapart[0:5] in [ + "xqrvc", + "xcv80", + ] def get_dsp_block(fpgapart): From 380d2ac00a91600fb39a06dcd27eaf50a9fd4a6f Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 8 Jul 2024 11:26:49 +0100 Subject: [PATCH 18/51] [RTL MVAU] Allow for 4bit compute with dsp48 for versal devices --- .../custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 3e81aa93e0..d9ab501117 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -208,7 +208,10 @@ def _resolve_impl_style(self, dsp_block): weight_width = self.get_input_datatype(1).bitwidth() if dsp_block == "DSP58": - return "mvu_vvu_8sx9_dsp58" + if act_width <= 4 and weight_width <= 4: + return "mvu_4sx4u_dsp48e2" + else: + return "mvu_vvu_8sx9_dsp58" else: if act_width <= 4 and weight_width <= 4: if dsp_block == "DSP48E1": From 91cec4ee2df3adfdc41790e456a163fbc4b16585 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 27 Aug 2024 11:10:37 +0100 Subject: [PATCH 19/51] [Deps] Update Brevitas commit and update unpacking of brevitas tensor in test --- fetch-repos.sh | 2 +- tests/brevitas/test_brevitas_debug.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 6ce9ad76d4..1d7d86b71b 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -29,7 +29,7 @@ QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" -BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" +BREVITAS_COMMIT="89fca2f56b57650e77b8e400f9e579c065186ccd" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py index d6879a727b..3d059a6856 100644 --- a/tests/brevitas/test_brevitas_debug.py +++ b/tests/brevitas/test_brevitas_debug.py @@ -35,6 +35,7 @@ import os import torch from brevitas.export import export_qonnx +from brevitas.quant_tensor import _unpack_quant_tensor from pkgutil import get_data from qonnx.core.modelwrapper import ModelWrapper from qonnx.util.cleanup import cleanup as qonnx_cleanup @@ -90,7 +91,7 @@ def test_brevitas_debug(QONNX_FINN_conversion): else: assert len(names_common) == 8 for dbg_name in names_common: - tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy() + tensor_pytorch = _unpack_quant_tensor(dbg_hook.values[dbg_name]).detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx) From f6b1e2b6bb01428f96701c467ce76d774bda2c6c Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 5 Sep 2024 10:49:10 +0100 Subject: [PATCH 20/51] [Deps] Update brevitas commit hash --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 1d7d86b71b..a4fc124fa4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -29,7 +29,7 @@ QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" -BREVITAS_COMMIT="89fca2f56b57650e77b8e400f9e579c065186ccd" +BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" From 4a776ec99504bee33f20c5ec162b5828e222a14c Mon Sep 17 00:00:00 2001 From: Remo Senekowitsch Date: Wed, 4 Sep 2024 10:45:29 +0200 Subject: [PATCH 21/51] Update links to finn-base repo Signed-off-by: Remo Senekowitsch --- docs/finn/faq.rst | 2 +- .../cybersecurity/3-build-accelerator-with-finn.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst index 70c2f24ed2..0d643feba3 100644 --- a/docs/finn/faq.rst +++ b/docs/finn/faq.rst @@ -81,7 +81,7 @@ Which data layout do FINN-generated accelerators use? Big-endian? Little-endian? If you need to do this manually, first examine how the `FINN PYNQ Python drivers `_ do this – notice how the input data is first reshaped to create the “folded input shape” that reflects the word size of the first layer based on how much it was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be - fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input `_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation. + fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input `_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation. Why does FIFO sizing take so long for my network? Is something wrong? The automatic FIFO sizing in FINN can take quite long. It unfortunately doesn’t really parallelize on multiple cores since diff --git a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb index 73cd25cf20..28702d0286 100644 --- a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb +++ b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb @@ -78,7 +78,7 @@ "### Configuring the Board and FPGA Part \n", "\n", "* `fpga_part`: Xilinx FPGA part to be used for synthesis, can be left unspecified to be inferred from `board` below, or specified explicitly for e.g. out-of-context synthesis.\n", - "* `board`: target Xilinx Zynq or Alveo board for generating accelerators integrated into a shell. See the `pynq_part_map` and `alveo_part_map` dicts in [this file](https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/basic.py#L41) for a list of possible boards.\n", + "* `board`: target Xilinx Zynq or Alveo board for generating accelerators integrated into a shell. See the `pynq_part_map` and `alveo_part_map` dicts in [this file](https://github.com/Xilinx/finn/blob/dev/src/finn/util/basic.py#L39) for a list of possible boards.\n", "* `shell_flow_type`: the target [shell flow type](https://finn-dev.readthedocs.io/en/latest/source_code/finn.builder.html#finn.builder.build_dataflow_config.ShellFlowType), only needed for generating full bitfiles where the FINN design is integrated into a shell (so only needed if `BITFILE` is selected) \n", "\n", "### Configuring the Performance \n", From ec5613c68f209202cf7fefb21d383b0072a2441f Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 19 Sep 2024 10:15:08 +0100 Subject: [PATCH 22/51] [InsertFIFO] Preserve onnx tensor dtype when inserting FIFOs --- src/finn/transformation/fpgadataflow/insert_fifo.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 9df193efcf..21fb843052 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -29,7 +29,6 @@ import numpy as np import warnings -from onnx import TensorProto from onnx import helper as oh from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation @@ -114,6 +113,8 @@ def apply(self, model): # determine fifo node attributes fld_shape = n0.get_folded_output_shape() dtype = n0.get_output_datatype() + n0_otensor = model.get_tensor_valueinfo(output_name) + n0_tensor_dtype = n0_otensor.type.tensor_type.elem_type # check if folded_shape of output of first node and # input of the second node is equal @@ -145,7 +146,7 @@ def apply(self, model): # or unless create_shallow_fifos is specified fifo_output_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), - TensorProto.FLOAT, + n0_tensor_dtype, n0.get_normal_output_shape(), ) graph.value_info.append(fifo_output_tensor) @@ -196,13 +197,15 @@ def apply(self, model): fld_shape = n0.get_folded_input_shape(inp_ind) n_shape = n0.get_normal_input_shape(inp_ind) dtype = n0.get_input_datatype(inp_ind) + n0_itensor = model.get_tensor_valueinfo(graph_in_name) + n0_tensor_dtype = n0_itensor.type.tensor_type.elem_type fifo_depth = n0.get_nodeattr("inFIFODepths")[inp_ind] if fifo_depth > 2 or self.create_shallow_fifos: # create fifo node fifo_output_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), - TensorProto.FLOAT, + n0_tensor_dtype, n0.get_normal_input_shape(inp_ind), ) graph.value_info.append(fifo_output_tensor) @@ -256,13 +259,15 @@ def apply(self, model): fld_shape = n0.get_folded_output_shape(out_ind) n_shape = n0.get_normal_output_shape(out_ind) dtype = n0.get_output_datatype(out_ind) + n0_otensor = model.get_tensor_valueinfo(graph_out_name) + n0_tensor_dtype = n0_otensor.type.tensor_type.elem_type fifo_depth = n0.get_nodeattr("outFIFODepths")[out_ind] if fifo_depth > 2 or self.create_shallow_fifos: # create fifo node fifo_input_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), - TensorProto.FLOAT, + n0_tensor_dtype, n0.get_normal_output_shape(), ) graph.value_info.append(fifo_input_tensor) From fb600553d5618d36be334f7dd6c99dea789b0c83 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 19 Sep 2024 10:58:43 +0100 Subject: [PATCH 23/51] [InsertDWC] Preserve onnx tensor dtype when inserting DWCs --- src/finn/transformation/fpgadataflow/insert_dwc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index 33cc3e86d3..b56c8b74ea 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -26,7 +26,6 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from onnx import TensorProto from onnx import helper as oh from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation @@ -110,12 +109,15 @@ def apply(self, model): # determine shape for dwc dwc_shape = n0.get_normal_output_shape() - # determine dtype for dwc + # determine FINN dtype for dwc dtype = n0.get_output_datatype() + # determine onnx tensor dtype for dwc + n0_otensor = model.get_tensor_valueinfo(output_name) + n0_tensor_dtype = n0_otensor.type.tensor_type.elem_type dwc_output_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), - TensorProto.FLOAT, + n0_tensor_dtype, dwc_shape, ) graph.value_info.append(dwc_output_tensor) From 03830929697464666b58be717ece8328bc6c6965 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:28:15 +0000 Subject: [PATCH 24/51] [Fix] InferDuplicateStreamsLayer now properly handles forks of multiple-output nodes --- .../fpgadataflow/convert_to_hw_layers.py | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 25a2032aeb..b02bc89db8 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -585,63 +585,63 @@ def apply(self, model): for node in graph.node: node_ind += 1 - successors = model.find_consumers(node.output[0]) - if successors is not None and len(successors) >= 2: - output_tensor = node.output[0] - n_outputs = len(successors) + for output_tensor in node.output: + successors = model.find_consumers(output_tensor) + if successors is not None and len(successors) >= 2: + n_outputs = len(successors) - dt = model.get_tensor_datatype(output_tensor) + dt = model.get_tensor_datatype(output_tensor) - # skip conversion for layers with float input - if not dt.is_integer(): - continue + # skip conversion for layers with float input + if not dt.is_integer(): + continue - # create clone tensors - out_shape = model.get_tensor_shape(output_tensor) - out_tensor_clones = [] - for i in range(n_outputs): - clone = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape - ) - model.graph.value_info.append(clone) - out_tensor_clones += [clone.name] + # create clone tensors + out_shape = model.get_tensor_shape(output_tensor) + out_tensor_clones = [] + for i in range(n_outputs): + clone = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(clone) + out_tensor_clones += [clone.name] - num_ch = int(out_shape[-1]) - vecs = out_shape[:-1] + num_ch = int(out_shape[-1]) + vecs = out_shape[:-1] - # create node with no parallelization first - pe = 1 + # create node with no parallelization first + pe = 1 - dup_node = helper.make_node( - "DuplicateStreams", - [output_tensor], - out_tensor_clones, - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=num_ch, - PE=pe, - inputDataType=dt.name, - numInputVectors=vecs, - NumOutputStreams=n_outputs, - outFIFODepths=[2] * n_outputs, - name="DuplicateStreams_" + node.name, - ) + dup_node = helper.make_node( + "DuplicateStreams", + [output_tensor], + out_tensor_clones, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=dt.name, + numInputVectors=vecs, + NumOutputStreams=n_outputs, + outFIFODepths=[2] * n_outputs, + name="DuplicateStreams_" + node.name, + ) - graph.node.insert(node_ind, dup_node) + graph.node.insert(node_ind, dup_node) - # connect successors to out tensor clone - clone_idx = 0 - for successor in successors: - for i, succ_input in enumerate(successor.input): - if succ_input == output_tensor: - successor.input[i] = out_tensor_clones[clone_idx] - clone_idx += 1 - # if one node has multiple connections to the same output - # find_direct_successors will return one node per input - # so break the inner loop will result in correct behaviour - break + # connect successors to out tensor clone + clone_idx = 0 + for successor in successors: + for i, succ_input in enumerate(successor.input): + if succ_input == output_tensor: + successor.input[i] = out_tensor_clones[clone_idx] + clone_idx += 1 + # if one node has multiple connections to the same output + # find_direct_successors will return one node per input + # so break the inner loop will result in correct behaviour + break - graph_modified = True + graph_modified = True if graph_modified: model = model.transform(SortGraph()) From d13aa7e7debb21bd1d75b6dbb6eddc959b4ae8c8 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:48:43 +0000 Subject: [PATCH 25/51] [Fix] MoveScalarLinearPastInvariants, MakeMaxPoolNHWC, MakeScaleResizeNHWC transformations are checking whether the node to be moved is a fork node, in which case the MoveOpPastFork is called. MoveOpPastFork uses deepcopies of the original node. --- src/finn/transformation/streamline/reorder.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 8ac2d7dad6..9a7e9d0723 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -29,6 +29,7 @@ import numpy as np import qonnx.core.data_layout as DataLayout import warnings +from copy import deepcopy from onnx import TensorProto from onnx import helper as oh from qonnx.core.datatype import DataType @@ -641,6 +642,10 @@ def apply(self, model): # if initializer is not scalar, skip if np.prod(init0.shape) != 1: continue + if model.is_fork_node(prod0): + model = model.transform(MoveOpPastFork(prod0.op_type)) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) # Flatten input if required if len(init0.shape) > 0: init0 = init0.flatten()[0] @@ -713,6 +718,12 @@ def apply(self, model): elif producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) ceil_mode = get_by_name(n.attribute, "ceil_mode") if ceil_mode is not None: ceil_mode = ceil_mode.i @@ -764,6 +775,12 @@ def apply(self, model): if producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) old_value = model.get_initializer(n.input[scales_ind]) new_value = np.array( [old_value[idx] for idx in (0, 2, 3, 1)], @@ -813,10 +830,9 @@ class MoveOpPastFork(Transformation): can be merged with nodes in the branches """ - def __init__(self, op_name_list, get_attrs_fxn=lambda x: {}): + def __init__(self, op_name_list): super().__init__() self.ops_to_move = op_name_list - self.get_attrs_fxn = get_attrs_fxn def apply(self, model): graph = model.graph @@ -859,11 +875,9 @@ def apply(self, model): new_param_name = model.make_new_valueinfo_name() new_inp_list = [n.input[0], new_param_name] model.set_initializer(new_param_name, op_init_param) - attrs = self.get_attrs_fxn(n) - # TODO use copy of original node instead to get attrs? - new_node = oh.make_node( - n.op_type, new_inp_list, [new_output_tensor_name], **attrs - ) + new_node = deepcopy(n) + new_node.input[:] = new_inp_list + new_node.output[:] = [new_output_tensor_name] graph.node.insert(node_ind, new_node) node_ind += 1 @@ -901,7 +915,7 @@ def __init__(self): class MoveTransposePastFork(MoveOpPastFork): def __init__(self): - super().__init__(["Transpose"], lambda x: {"perm": get_by_name(x.attribute, "perm").ints}) + super().__init__(["Transpose"]) class MoveMaxPoolPastMultiThreshold(Transformation): From 6223abe86c7d9aee43788825f3c19545dab0ea54 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:59:14 +0000 Subject: [PATCH 26/51] [Fix] InsertFIFO transform is fixed for the case of the last node in the graph being a fork node --- src/finn/transformation/fpgadataflow/insert_fifo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 21fb843052..9ed0f51cd4 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -268,7 +268,7 @@ def apply(self, model): fifo_input_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), n0_tensor_dtype, - n0.get_normal_output_shape(), + n0.get_normal_output_shape(out_ind), ) graph.value_info.append(fifo_input_tensor) model.set_tensor_datatype(fifo_input_tensor.name, dtype) @@ -294,7 +294,7 @@ def apply(self, model): graph.node.append(fifo_node) # set fifo output tensor as new input tensor of second node - final_node.output[0] = fifo_input_tensor.name + final_node.output[out_ind] = fifo_input_tensor.name else: warnings.warn( """Output FIFO for %s has depth %d and won't From 11d8234fdcfb03c00a700dd3ba82cb88d6da66e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 25 Sep 2024 13:27:04 +0100 Subject: [PATCH 27/51] Harden lane width computations against 32-bit numeric overflow. --- finn-rtllib/mvu/mvu_8sx8u_dsp48.sv | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv b/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv index 107a00918e..dabb36647e 100644 --- a/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv +++ b/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv @@ -72,6 +72,10 @@ module mvu_8sx8u_dsp48 #( return res; endfunction : init_leave_loads + function int unsigned sum_width(input int unsigned n, input int unsigned w); + return w <= 16? $clog2(1 + n*(2**w - 1)) : w + $clog2(n); + endfunction : sum_width + // Pipeline for last indicator flag logic [1:5] L = '0; always_ff @(posedge clk) begin @@ -445,7 +449,7 @@ module mvu_8sx8u_dsp48 #( // Stage #4: Cross-SIMD Reduction // Count leaves reachable from each node - localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0}; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop + localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0 }; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop // Range of Cross-lane Contribution Tracked in Hi4 /* @@ -462,7 +466,7 @@ module mvu_8sx8u_dsp48 #( * signed value is determined by its lower bound to be at least: * 1 + $clog2(2^(w-1)+SIMD) */ - localparam int unsigned HI_WIDTH = 1 + $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD); + localparam int unsigned HI_WIDTH = 1 + ($clog2(SIMD) < ACCU_WIDTH-D[1]? ACCU_WIDTH-D[1] : $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD)); uwire signed [ACCU_WIDTH -1:0] up4; uwire signed [HI_WIDTH -1:0] hi4; @@ -504,12 +508,12 @@ module mvu_8sx8u_dsp48 #( // Conclusive low part accumulation if(i >= PE_REM) begin : blkLo // Adder Tree across all SIMD low contributions (all unsigned arithmetic) - localparam int unsigned ROOT_WIDTH = $clog2(1 + SIMD*(2**LO_WIDTH-1)); + localparam int unsigned ROOT_WIDTH = sum_width(SIMD, LO_WIDTH); uwire [2*SIMD-2:0][ROOT_WIDTH-1:0] tree; for(genvar s = 0; s < SIMD; s++) assign tree[SIMD-1+s] = p3[s][D[i]+:LO_WIDTH]; for(genvar n = 0; n < SIMD-1; n++) begin // Sum truncated to actual maximum bit width at this node - localparam int unsigned NODE_WIDTH = $clog2(1 + LEAVE_LOAD[n]*(2**LO_WIDTH-1)); + localparam int unsigned NODE_WIDTH = sum_width(LEAVE_LOAD[n], LO_WIDTH); uwire [NODE_WIDTH-1:0] s = tree[2*n+1] + tree[2*n+2]; assign tree[n] = s; end From 945a4a4c7e341b3d5acaa929e51672babe70bc36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 25 Sep 2024 13:33:04 +0100 Subject: [PATCH 28/51] Adding testbench having two accumulator sized run against one another. --- finn-rtllib/mvu/tb/mvu_accu_tb.dat | 192 +++++++++++++++++++++++++++++ finn-rtllib/mvu/tb/mvu_accu_tb.sv | 162 ++++++++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 finn-rtllib/mvu/tb/mvu_accu_tb.dat create mode 100644 finn-rtllib/mvu/tb/mvu_accu_tb.sv diff --git a/finn-rtllib/mvu/tb/mvu_accu_tb.dat b/finn-rtllib/mvu/tb/mvu_accu_tb.dat new file mode 100644 index 0000000000..7e102ab6ab --- /dev/null +++ b/finn-rtllib/mvu/tb/mvu_accu_tb.dat @@ -0,0 +1,192 @@ +9 +4 +d +9 +2 +a +d +7 +9 +7 +b +4 +4 +7 +0 +0 +c +9 +9 +1 +9 +0 +a +0 +5 +5 +7 +7 +2 +6 +7 +9 +0 +0 +9 +7 +7 +c +7 +9 +7 +1 +2 +0 +f +7 +1 +7 +f +7 +1 +7 +1 +6 +6 +9 +e +f +e +a +6 +1 +7 +9 +d +a +7 +7 +f +4 +7 +f +9 +f +9 +1 +9 +f +7 +3 +4 +1 +1 +0 +d +c +d +b +9 +9 +f +7 +0 +5 +e +6 +7 +e +7 +1 +7 +0 +e +3 +c +4 +9 +7 +9 +9 +d +e +c +1 +f +7 +0 +7 +1 +7 +d +0 +7 +e +a +1 +9 +4 +b +7 +9 +0 +a +e +6 +7 +2 +9 +0 +9 +0 +9 +1 +9 +0 +0 +7 +2 +7 +1 +5 +9 +1 +9 +6 +7 +c +1 +9 +d +9 +f +c +9 +9 +9 +b +b +9 +f +9 +5 +1 +3 +0 +9 +0 +9 +2 +a +9 +0 +f +0 +7 +0 +a +7 +3 +e +5 +7 diff --git a/finn-rtllib/mvu/tb/mvu_accu_tb.sv b/finn-rtllib/mvu/tb/mvu_accu_tb.sv new file mode 100644 index 0000000000..ceeb31194c --- /dev/null +++ b/finn-rtllib/mvu/tb/mvu_accu_tb.sv @@ -0,0 +1,162 @@ +/****************************************************************************** + * Copyright (C) 2024, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Testbench for MVU core compute kernel. + *****************************************************************************/ + +module mvu_accu_tb; + + localparam IS_MVU = 1; + localparam COMPUTE_CORE = "mvu_8sx8u_dsp48"; + localparam PUMPED_COMPUTE = 0; + localparam MW = 6; + localparam MH = 32; + localparam PE = 1; + localparam SIMD = 1; + localparam ACTIVATION_WIDTH = 8; + localparam WEIGHT_WIDTH = 4; + localparam NARROW_WEIGHTS = 1; + localparam SIGNED_ACTIVATIONS = 1; + localparam SEGMENTLEN = 1; + localparam FORCE_BEHAVIORAL = 0; + + // Safely deducible parameters + localparam WEIGHT_STREAM_WIDTH_BA = (PE*SIMD*WEIGHT_WIDTH+7)/8 * 8; + localparam INPUT_STREAM_WIDTH_BA = ((IS_MVU == 1 ? 1 : PE) * SIMD * ACTIVATION_WIDTH + 7) / 8 * 8; + + // Global Control + logic clk = 0; + always #5ns clk = !clk; + logic rst = 1; + initial begin + repeat(16) @(posedge clk); + rst <= 0; + end + + logic [WEIGHT_WIDTH-1:0] WeightMem[MH*MW]; + initial $readmemh("mvu_accu_tb.dat", WeightMem); + + // Shared Input Feed + logic [INPUT_STREAM_WIDTH_BA-1:0] in_TDATA; + logic in_TVALID[2]; + uwire in_TREADY[2]; + initial begin + in_TDATA = 'x; + in_TVALID = '{ default: 0 }; + @(posedge clk iff !rst); + + repeat(2161*MW) begin + automatic logic [ACTIVATION_WIDTH-1:0] a = $urandom(); + in_TDATA <= a; + in_TVALID <= '{ default: 1 }; + fork + begin + @(posedge clk iff in_TREADY[0]); + in_TVALID[0] <= 0; + end + begin + @(posedge clk iff in_TREADY[1]); + in_TVALID[1] <= 0; + end + join + end + + repeat(MH*MW) @(posedge clk); + $display("Test completed."); + $finish; + end + + // DUTs + localparam int unsigned ACCU_WIDTHS[2] = '{ 16, 32 }; + int OutQ[2][$]; + for(genvar i = 0; i < $size(ACCU_WIDTHS); i++) begin : genDUTs + localparam int unsigned ACCU_WIDTH = ACCU_WIDTHS[i]; + localparam int unsigned OUTPUT_STREAM_WIDTH_BA = (PE*ACCU_WIDTH + 7)/8 * 8; + + // Private Weight Feed + logic [WEIGHT_STREAM_WIDTH_BA-1:0] weights_TDATA; + logic weights_TVALID; + uwire weights_TREADY; + initial begin + weights_TDATA = 'x; + weights_TVALID = 0; + @(posedge clk iff !rst); + + weights_TVALID <= 1; + forever begin + for(int unsigned i = 0; i < MH*MW; i++) begin + weights_TDATA <= WeightMem[i]; + @(posedge clk iff weights_TREADY); + end + end + end + + // Private Output Capture into Queue + uwire signed [OUTPUT_STREAM_WIDTH_BA-1:0] out_TDATA; + uwire out_TVALID; + uwire out_TREADY = !rst; + always_ff @(posedge clk iff !rst) begin + if(out_TVALID) OutQ[i].push_back(out_TDATA); + end + + // Actual DUT Instance + mvu_vvu_axi #( + .IS_MVU(IS_MVU), .COMPUTE_CORE(COMPUTE_CORE), .PUMPED_COMPUTE(PUMPED_COMPUTE), .MW(MW), .MH(MH), .PE(PE), .SIMD(SIMD), + .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), .NARROW_WEIGHTS(NARROW_WEIGHTS), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL) + ) dut ( + .ap_clk(clk), + .ap_clk2x(1'b0), + .ap_rst_n(!rst), + .s_axis_weights_tdata(weights_TDATA), + .s_axis_weights_tvalid(weights_TVALID), + .s_axis_weights_tready(weights_TREADY), + .s_axis_input_tdata(in_TDATA), + .s_axis_input_tvalid(in_TVALID[i]), + .s_axis_input_tready(in_TREADY[i]), + .m_axis_output_tdata(out_TDATA), + .m_axis_output_tvalid(out_TVALID), + .m_axis_output_tready(out_TREADY) + ); + end : genDUTs + + // Output Equivalence Checker + always_ff @(posedge clk) begin + if(OutQ[0].size && OutQ[1].size) begin + automatic int unsigned y0 = OutQ[0].pop_front(); + automatic int unsigned y1 = OutQ[1].pop_front(); + assert(y0 == y1) else begin + $error("Output Mismatch: %0d vs. %0d", y0, y1); + $stop; + end + end + end + +endmodule : mvu_accu_tb From 00c3a83aae2a28d75abc097d2655633fc7d55c0d Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 26 Sep 2024 11:44:18 +0100 Subject: [PATCH 29/51] [RoundThresh] Clean-up transformation and test files --- .../streamline/round_thresholds.py | 43 +-- .../streamline/test_round_thresholds.py | 263 +++++++++--------- 2 files changed, 132 insertions(+), 174 deletions(-) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index 2666242730..ab986e7826 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, Xilinx +# Copyright (c) 2020-2022, Xilinx +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,22 +27,12 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# Need numpy for modifying the onnx graph tensors, which are numpy style arrays import numpy as np - -# QONNX wrapper of ONNX model graphs from qonnx.core.modelwrapper import ModelWrapper - -# QONNX graph transformation base class from qonnx.transformation.base import Transformation - -# Transformation running qonnx datatype inference from qonnx.transformation.infer_datatypes import InferDataTypes -# Rounds and clips thresholds to integer values if the node inputs are integer, -# respecting range, representability and data type (promotion) of the container -# data type class RoundAndClipThresholds(Transformation): """For MultiThreshold nodes operating on integer inputs, round up thresholds values to the nearest integer. Additionally, if the input @@ -50,29 +41,19 @@ class RoundAndClipThresholds(Transformation): annotation). Runs InferDataTypes() afterward to propagate any changes to the quantization data types.""" - # Applies the transform to a whole model graph def apply(self, model: ModelWrapper): # noqa - # Get the model graph out of the model wrapper object graph = model.graph - # Keep track of whether the graph has been modified graph_modified = False - # Iterate all nodes in the graph keeping track of the index for index, node in enumerate(graph.node): - # Applies to initializer tensors of MultiThreshold operations - if node.op_type == "MultiThreshold": - # Try to get the thresholds initializer tensor + op_type = node.op_type + if op_type == "MultiThreshold": thresholds = model.get_initializer(node.input[1]) - # There might be no constant thresholds stored as initializer - # tensor inside the model if thresholds is None: - # Nothing we can do, skip to the next node continue - # Get the data type of the inputs to this operation dtype = model.get_tensor_datatype(node.input[0]) # This transformation only applies to thresholding operations # operating on integer inputs if not dtype.is_integer(): - # Nothing we can do, skip to the next node continue # Round thresholds up to nearest integer and clip thresholds # outside the input range @@ -80,24 +61,14 @@ def apply(self, model: ModelWrapper): # noqa # introduce extra inaccuracies due to large integers not being # exactly representable in floating-point representation. # See for example: np.ceil(np.float32(16777217)) == 16777216 - # fmt: off - new_thresholds = np.clip( - np.ceil(thresholds), dtype.min(), dtype.max() - ) - # fmt: on + new_thresholds = np.clip(np.ceil(thresholds), dtype.min(), dtype.max()) # Convert back to the preferred float32 container type - # Note: np.clip might have promoted the thresholds to float64 - # TODO: Maybe consider an int64 container type for thresholds - # rounded to integer? Need to check all other transformations - # and code generation through the whole FINN and QONNX stack - # first, as these probably assume a float32 container type. new_thresholds = new_thresholds.astype(np.float32) # Insert the rounded and clipped thresholds back into the model model.set_initializer(node.input[1], new_thresholds) # The rounded and clipped thresholds now fit into the input data # type model.set_tensor_datatype(node.input[1], dtype) - # Test whether the new thresholds actually differ from the old # ones if np.any(new_thresholds != thresholds): # Track the graph has been modified to inform the transform @@ -107,9 +78,5 @@ def apply(self, model: ModelWrapper): # noqa # Immediately exit here to propagate the data type changes # before considering the next node break - # Some data types might have changed, do one pass of data type inference - # to propagate these changes through the graph model = model.transform(InferDataTypes()) - # Return the transformed model and indicate whether the graph actually - # has been transformed to exhaustively apply this transformation again. return model, graph_modified diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index 63375598a0..7e2d39176e 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, Xilinx +# Copyright (c) 2020-2022, Xilinx, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,32 +27,15 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# fmt: off -# Disable formatter. This is deliberately formatted to stay within 80 characters -# per line. Black, however, formats some lines going beyond this. - -# Testing framework import pytest -# Use numpy for python execution / computing the ground truth expected values import numpy as np - -# Utility types and function for creating onnx nodes and graphs from onnx import TensorProto, helper - -# QONNX data types like INT25 from qonnx.core.datatype import DataType - -# QONNX wrapper of ONNX model graphs from qonnx.core.modelwrapper import ModelWrapper - -# Generate random tensors of QONNX/FINN data types for testing from qonnx.util.basic import gen_finn_dt_tensor -# Execution of onnx graphs within FINN import finn.core.onnx_exec as oxe - -# The transformation to be tested from finn.transformation.streamline import RoundAndClipThresholds @@ -59,173 +43,186 @@ # data type combinations with purely integer inputs. Without proper rounding, # this tests only the clipping, range and type-casting behavior of the # transformation. -@pytest.mark.parametrize("i_dtype", [ - # Explanation for selecting these test configurations: - # 1. Below 24-bit thresholds we will not observe any interesting rounding - # behavior, as all integers < 2^24 can be exactly represented in 32-bit - # floating-point. Thus, we test thresholds at 25-bit signed integers and - # generate test inputs slightly above and below this. - # 2. We want to test out-of-range clipping of thresholds, in particular - # clipping of the negative portion of signed thresholds. Thus, we only - # generate signed thresholds, but test with signed and unsigned - # inputs of smaller, larger and equal range. - # 3. Testing proper floating-point thresholds requires a separate test-case - "INT23", "UINT23", "INT24", "UINT24", "INT25", "UINT25", "INT26", "UINT26" -]) -@pytest.mark.parametrize("o_dtype", [ - # Explanation for selecting these test configurations: - # 1. Outputs of MultiThreshold are typically much smaller bit-width than the - # inputs and thresholds. - # 2. However, with randomly samples thresholds from a rather large range due - # to the selected input bit-widths (see above), we risk not adequately - # covering the input range if we sample too few thresholds. The number of - # thresholds sampled depends on the bit-width of the output, thus we use - # rather high bit-width for testing. - # 3. For a "real" model, the quantization procedure *should* take care of - # adequately covering the true input range. - "INT8", "UINT8" -]) -@pytest.mark.parametrize("n_elems", [ - # Explanation for selecting these test configurations: - # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 - # 2. Large test case 256, hopefully amplifying any rarely occurring errors - 1, 2, 3, 4, 256 -]) +@pytest.mark.parametrize( + "i_dtype", + [ + # Explanation for selecting these test configurations: + # 1. Below 24-bit thresholds we will not observe any interesting rounding + # behavior, as all integers < 2^24 can be exactly represented in 32-bit + # floating-point. Thus, we test thresholds at 25-bit signed integers and + # generate test inputs slightly above and below this. + # 2. We want to test out-of-range clipping of thresholds, in particular + # clipping of the negative portion of signed thresholds. Thus, we only + # generate signed thresholds, but test with signed and unsigned + # inputs of smaller, larger and equal range. + # 3. Testing proper floating-point thresholds requires a separate test-case + "INT23", + "UINT23", + "INT24", + "UINT24", + "INT25", + "UINT25", + "INT26", + "UINT26", + ], +) +@pytest.mark.parametrize( + "o_dtype", + [ + # Explanation for selecting these test configurations: + # 1. Outputs of MultiThreshold are typically much smaller bit-width than the + # inputs and thresholds. + # 2. However, with randomly samples thresholds from a rather large range due + # to the selected input bit-widths (see above), we risk not adequately + # covering the input range if we sample too few thresholds. The number of + # thresholds sampled depends on the bit-width of the output, thus we use + # rather high bit-width for testing. + # 3. For a "real" model, the quantization procedure *should* take care of + # adequately covering the true input range. + "INT8", + "UINT8", + ], +) +@pytest.mark.parametrize( + "n_elems", + [ + # Explanation for selecting these test configurations: + # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 + # 2. Large test case 256, hopefully amplifying any rarely occurring errors + 1, + 2, + 3, + 4, + 256, + ], +) def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): - # Convert string representation of data type to onnx DataType i_dtype = DataType[i_dtype] t_dtype = DataType["INT25"] # Note: Matches configuration above o_dtype = DataType[o_dtype] # noqa: Duplicate model setup code - # Create a dummy MultiThreshold operation to be tested node = helper.make_node( - # Op-Type of the node "MultiThreshold", - # MultiThreshold is implemented under the qonnx domain domain="qonnx.custom_op.general", - # List the names of the input tensors inputs=["inp", "thresholds"], - # List the names of the output tensors outputs=["out"], - # The CustomOp needs to know the data type of the output to be produced - out_dtype=str(o_dtype) + out_dtype=str(o_dtype), ) - # Number of threshold values required to produce outputs of type o_dtype n_thresholds = o_dtype.get_num_possible_values() - 1 - # Create tensor value infos for all input/output tensors involved inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, n_elems]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, n_elems]) - # Create a tensor value info for the thresholds parameter tensor - # Note: Number of thresholds is determined by the output data type thresholds = helper.make_tensor_value_info( "thresholds", TensorProto.FLOAT, [n_elems, n_thresholds] ) - # Combine node and tensor value infos into an onnx graph graph = helper.make_graph([node], "thresholds", [inp, thresholds], [out]) - # Wrap the model graph in a ModelWrapper container model = ModelWrapper(helper.make_model(graph)) - # Sample random tensors of the configured input data type + inp = gen_finn_dt_tensor(i_dtype, [1, n_elems]) - # Generate sorted thresholds for each of the input channels thresholds = np.sort(gen_finn_dt_tensor(t_dtype, [n_elems, n_thresholds])) - # Set data type annotations for the input and thresholds tensor model.set_tensor_datatype("inp", i_dtype) # noqa: Duplicate model execution model.set_tensor_datatype("thresholds", t_dtype) model.set_tensor_datatype("out", o_dtype) - # Set the thresholds as initializer input to the model model.set_initializer("thresholds", thresholds) + # Execute the model before running the RoundAndClipThresholds transformation out_expected = oxe.execute_onnx(model, {"inp": inp})["out"] - # Before rounding the threshold data type must be as annotated assert model.get_tensor_datatype("thresholds") == t_dtype - # Run the transformation to be tested + model = model.transform(RoundAndClipThresholds()) + # After this transformation, the thresholds and output data type should be # inferred correctly assert model.get_tensor_datatype("thresholds") == i_dtype assert model.get_tensor_datatype("out") == o_dtype + # After this transformation, the container type used to store the thresholds # values must be float32. No other type-cast or type promotion may happen. assert model.get_initializer("thresholds").dtype == np.float32 + # After rounding, all thresholds must be integers represented as float32 - assert all( - x.is_integer() for x in model.get_initializer("thresholds").flatten() - ) + assert all(x.is_integer() for x in model.get_initializer("thresholds").flatten()) + # Execute the model after running the RoundAndClipThresholds transformation out_produced = oxe.execute_onnx(model, {"inp": inp})["out"] - # Compare the results before and after: This is the pure integer test-case - # and no actual rounding should happen, thus the rounded operation should - # produce outputs exactly equal. + assert np.all(out_produced == out_expected) # Tests the RoundAndClipThresholds transformation under various input, output # data type combinations with purely integer inputs. This test case tests actual # rounding of floating-point thresholds. -@pytest.mark.parametrize("i_dtype", [ - # Explanation for selecting these test configurations: - # 1. Below 24-bit thresholds we will not observe any interesting rounding - # behavior, as all integers < 2^24 can be exactly represented in 32-bit - # floating-point. Thus, we test thresholds at 25-bit signed integers and - # generate test inputs slightly above and below this. - # 2. We want to test out-of-range clipping of thresholds, in particular - # clipping of the negative portion of signed thresholds. Thus, we only - # generate signed thresholds, but test with signed and unsigned - # inputs of smaller, larger and equal range. - # 3. Testing proper floating-point thresholds requires a separate test-case - "INT23", "UINT23", "INT24", "UINT24", "INT25", "UINT25", "INT26", "UINT26" -]) -@pytest.mark.parametrize("o_dtype", [ - # Explanation for selecting these test configurations: - # 1. Outputs of MultiThreshold are typically much smaller bit-width than the - # inputs and thresholds. - # 2. However, with randomly samples thresholds from a rather large range due - # to the selected input bit-widths (see above), we risk not adequately - # covering the input range if we sample too few thresholds. The number of - # thresholds sampled depends on the bit-width of the output, thus we use - # rather high bit-width for testing. - # 3. For a "real" model, the quantization procedure *should* take care of - # adequately covering the true input range. - "INT8", "UINT8" -]) -@pytest.mark.parametrize("n_elems", [ - # Explanation for selecting these test configurations: - # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 - # 2. Large test case 256, hopefully amplifying any rarely occurring errors - 1, 2, 3, 4, 256 -]) +@pytest.mark.parametrize( + "i_dtype", + [ + # Explanation for selecting these test configurations: + # 1. Below 24-bit thresholds we will not observe any interesting rounding + # behavior, as all integers < 2^24 can be exactly represented in 32-bit + # floating-point. Thus, we test thresholds at 25-bit signed integers and + # generate test inputs slightly above and below this. + # 2. We want to test out-of-range clipping of thresholds, in particular + # clipping of the negative portion of signed thresholds. Thus, we only + # generate signed thresholds, but test with signed and unsigned + # inputs of smaller, larger and equal range. + # 3. Testing proper floating-point thresholds requires a separate test-case + "INT23", + "UINT23", + "INT24", + "UINT24", + "INT25", + "UINT25", + "INT26", + "UINT26", + ], +) +@pytest.mark.parametrize( + "o_dtype", + [ + # Explanation for selecting these test configurations: + # 1. Outputs of MultiThreshold are typically much smaller bit-width than the + # inputs and thresholds. + # 2. However, with randomly samples thresholds from a rather large range due + # to the selected input bit-widths (see above), we risk not adequately + # covering the input range if we sample too few thresholds. The number of + # thresholds sampled depends on the bit-width of the output, thus we use + # rather high bit-width for testing. + # 3. For a "real" model, the quantization procedure *should* take care of + # adequately covering the true input range. + "INT8", + "UINT8", + ], +) +@pytest.mark.parametrize( + "n_elems", + [ + # Explanation for selecting these test configurations: + # 1. Small edge cases and quickly running through tests: 1, 2, 3, 4 + # 2. Large test case 256, hopefully amplifying any rarely occurring errors + 1, + 2, + 3, + 4, + 256, + ], +) def test_round_and_clip_thresholds_floats(i_dtype, o_dtype, n_elems): - # Convert string representation of data type to onnx DataType i_dtype = DataType[i_dtype] t_dtype = DataType["FLOAT32"] o_dtype = DataType[o_dtype] # noqa: Duplicate model setup code - # Create a dummy MultiThreshold operation to be tested node = helper.make_node( - # Op-Type of the node "MultiThreshold", - # MultiThreshold is implemented under the qonnx domain domain="qonnx.custom_op.general", - # List the names of the input tensors inputs=["inp", "thresholds"], - # List the names of the output tensors outputs=["out"], - # The CustomOp needs to know the data type of the output to be produced - out_dtype=str(o_dtype) + out_dtype=str(o_dtype), ) - # Number of threshold values required to produce outputs of type o_dtype n_thresholds = o_dtype.get_num_possible_values() - 1 - # Create tensor value infos for all input/output tensors involved inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, n_elems]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, n_elems]) - # Create a tensor value info for the thresholds parameter tensor - # Note: Number of thresholds is determined by the output data type thresholds = helper.make_tensor_value_info( "thresholds", TensorProto.FLOAT, [n_elems, n_thresholds] ) - # Combine node and tensor value infos into an onnx graph graph = helper.make_graph([node], "thresholds", [inp, thresholds], [out]) - # Wrap the model graph in a ModelWrapper container model = ModelWrapper(helper.make_model(graph)) - # Sample random tensors of the configured input data type + inp = gen_finn_dt_tensor(i_dtype, [1, n_elems]) # Draw uniformly random prototype thresholds in [0,+1] range thresholds = np.random.rand(n_elems, n_thresholds) @@ -238,30 +235,24 @@ def test_round_and_clip_thresholds_floats(i_dtype, o_dtype, n_elems): model.set_tensor_datatype("inp", i_dtype) # noqa: Duplicate model execution model.set_tensor_datatype("thresholds", t_dtype) model.set_tensor_datatype("out", o_dtype) - # Set the thresholds as initializer input to the model model.set_initializer("thresholds", thresholds) + # Execute the model before running the RoundAndClipThresholds transformation out_expected = oxe.execute_onnx(model, {"inp": inp})["out"] # Before rounding the threshold data type must be as annotated assert model.get_tensor_datatype("thresholds") == t_dtype - # Run the transformation to be tested + model = model.transform(RoundAndClipThresholds()) - # After this transformation, the thresholds and output data type should be - # inferred correctly + assert model.get_tensor_datatype("thresholds") == i_dtype assert model.get_tensor_datatype("out") == o_dtype + # After this transformation, the container type used to store the thresholds # values must be float32. No other type-cast or type promotion may happen. assert model.get_initializer("thresholds").dtype == np.float32 # After rounding, all thresholds must be integers represented as float32 - assert all( - x.is_integer() for x in model.get_initializer("thresholds").flatten() - ) - # Execute the model after running the RoundAndClipThresholds transformation + assert all(x.is_integer() for x in model.get_initializer("thresholds").flatten()) + out_produced = oxe.execute_onnx(model, {"inp": inp})["out"] - # Compare the results before and after: This is the floating-point test with - # actual rounding, this the transformed result may only be equal within some - # tolerance. - # Hm, never observed this to be relevant. For all test configurations, exact - # equality seems to hold, probably due to only integer inputs being tested. + assert np.allclose(out_produced, out_expected, atol=1.0e-3) From 717bfc13e2361e767c220a3d298245f04cfd84ef Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 26 Sep 2024 12:57:06 +0100 Subject: [PATCH 30/51] [RoundThresh] Expand rounding of thresholds to hw layers --- src/finn/builder/build_dataflow_steps.py | 2 ++ src/finn/transformation/streamline/round_thresholds.py | 2 +- tests/end2end/test_end2end_bnn_pynq.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index bdbcc53d83..ab2280554c 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -121,6 +121,7 @@ ) from finn.transformation.streamline import Streamline from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds from finn.util.basic import ( get_rtlsim_trace_depth, pyverilate_get_liveness_threshold_cycles, @@ -503,6 +504,7 @@ def step_minimize_bit_width(model: ModelWrapper, cfg: DataflowBuildConfig): if cfg.minimize_bit_width: model = model.transform(MinimizeWeightBitWidth()) model = model.transform(MinimizeAccumulatorWidth()) + model = model.transform(RoundAndClipThresholds()) # make sure the changed datatypes are propagated through the network model = model.transform(InferDataTypes()) return model diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index ab986e7826..907f127896 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -46,7 +46,7 @@ def apply(self, model: ModelWrapper): # noqa graph_modified = False for index, node in enumerate(graph.node): op_type = node.op_type - if op_type == "MultiThreshold": + if op_type == "MultiThreshold" or op_type.startswith("Thresholding"): thresholds = model.get_initializer(node.input[1]) if thresholds is None: continue diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 81c6316ec1..0d3418624a 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -94,6 +94,7 @@ MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, ) +from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds from finn.util.basic import get_finn_root, make_build_dir, test_board_map from finn.util.pytorch import ToTensor from finn.util.test import ( @@ -672,6 +673,7 @@ def test_minimize_bit_width(self, topology, wbits, abits, board): model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(MinimizeWeightBitWidth()) + model = model.transform(RoundAndClipThresholds()) curr_chkpt_name = get_checkpoint_name(topology, wbits, abits, "minimize_bit_width") model.save(curr_chkpt_name) From 6ade140e684167100cce408454efbd9c2b4008c3 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 26 Sep 2024 14:20:04 +0100 Subject: [PATCH 31/51] [RoundThresh] Add change of the weight datatype to hw op threshold rounding --- src/finn/transformation/streamline/round_thresholds.py | 5 +++++ tests/end2end/test_end2end_mobilenet_v1.py | 1 + 2 files changed, 6 insertions(+) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index 907f127896..ee6a31e3dc 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -29,6 +29,7 @@ import numpy as np from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation from qonnx.transformation.infer_datatypes import InferDataTypes @@ -69,6 +70,10 @@ def apply(self, model: ModelWrapper): # noqa # The rounded and clipped thresholds now fit into the input data # type model.set_tensor_datatype(node.input[1], dtype) + # If hw op we need to set the weight data type attribute as well + if op_type.startswith("Thresholding"): + inst = getCustomOp(node) + inst.set_nodeattr("weightDataType", dtype.name) # ones if np.any(new_thresholds != thresholds): # Track the graph has been modified to inform the transform diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py index 01d995c147..4c52277970 100644 --- a/tests/end2end/test_end2end_mobilenet_v1.py +++ b/tests/end2end/test_end2end_mobilenet_v1.py @@ -353,6 +353,7 @@ def test_end2end_mobilenet_minimize_bit_width(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(MinimizeWeightBitWidth()) + model = model.transform(RoundAndClipThresholds()) model.save(build_dir + "/end2end_mobilenet_minimize_bitwidth.onnx") From db353f4fda97df13c593c0a6733e1e3aee9c3ecc Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 27 Sep 2024 15:36:10 +0100 Subject: [PATCH 32/51] [RoundThresh] Allow for range + 1 --- .../streamline/round_thresholds.py | 17 ++++++++++++----- .../test_fpgadataflow_thresholding.py | 11 +++++++---- .../streamline/test_round_thresholds.py | 16 ++++++++++++++-- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index ee6a31e3dc..312db404ac 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -28,6 +28,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation @@ -62,18 +63,24 @@ def apply(self, model: ModelWrapper): # noqa # introduce extra inaccuracies due to large integers not being # exactly representable in floating-point representation. # See for example: np.ceil(np.float32(16777217)) == 16777216 - new_thresholds = np.clip(np.ceil(thresholds), dtype.min(), dtype.max()) + new_thresholds = np.clip(np.ceil(thresholds), dtype.min(), dtype.max() + 1) # Convert back to the preferred float32 container type new_thresholds = new_thresholds.astype(np.float32) # Insert the rounded and clipped thresholds back into the model model.set_initializer(node.input[1], new_thresholds) - # The rounded and clipped thresholds now fit into the input data - # type - model.set_tensor_datatype(node.input[1], dtype) + # The rounded and clipped thresholds now fit into a data type + # that is one bit bigger than the input datatype + # Determine new max_value + max_val = dtype.max() + 1 + if not dtype.signed(): + tdt = DataType.get_smallest_possible(max_val) + else: + tdt = DataType.get_smallest_possible(-(max_val) - 1) + model.set_tensor_datatype(node.input[1], tdt) # If hw op we need to set the weight data type attribute as well if op_type.startswith("Thresholding"): inst = getCustomOp(node) - inst.set_nodeattr("weightDataType", dtype.name) + inst.set_nodeattr("weightDataType", tdt.name) # ones if np.any(new_thresholds != thresholds): # Track the graph has been modified to inform the transform diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index fe7ba3d9fb..2079fe7fc5 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -49,6 +49,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -133,10 +134,8 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.parametrize( "idt_tdt_cfg", [ - (DataType["INT8"], DataType["INT8"]), - (DataType["INT8"], DataType["INT9"]), - (DataType["UINT5"], DataType["UINT5"]), - (DataType["UINT5"], DataType["UINT6"]), + (DataType["INT8"], DataType["INT25"]), + (DataType["UINT5"], DataType["UINT8"]), ], ) @pytest.mark.parametrize("fold", [-1, 1, 2]) @@ -145,6 +144,7 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.parametrize("impl_style", ["hls", "rtl"]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.parametrize("mem_mode", ["internal_embedded", "internal_decoupled"]) +@pytest.mark.parametrize("round_thresh", [True, False]) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow @@ -159,6 +159,7 @@ def test_fpgadataflow_thresholding( impl_style, exec_mode, mem_mode, + round_thresh, ): # the mem_mode parameter can only be used for the hls thresholding # so the test will only be executed once for impl_style=rtl and once skipped @@ -234,6 +235,8 @@ def test_fpgadataflow_thresholding( node = model.get_nodes_by_op_type(model.graph.node[0].op_type)[0] inst = getCustomOp(node) inst.set_nodeattr("PE", pe) + if round_thresh is True: + model = model.transform(RoundAndClipThresholds()) model = model.transform(GiveUniqueNodeNames()) if impl_style == "hls": diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index 7e2d39176e..6de82e6750 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -96,6 +96,7 @@ 256, ], ) +@pytest.mark.streamline def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): i_dtype = DataType[i_dtype] t_dtype = DataType["INT25"] # Note: Matches configuration above @@ -106,6 +107,7 @@ def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): inputs=["inp", "thresholds"], outputs=["out"], out_dtype=str(o_dtype), + out_bias=float(o_dtype.min()), ) n_thresholds = o_dtype.get_num_possible_values() - 1 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, n_elems]) @@ -117,6 +119,7 @@ def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): model = ModelWrapper(helper.make_model(graph)) inp = gen_finn_dt_tensor(i_dtype, [1, n_elems]) + inp[0][0] = i_dtype.max() thresholds = np.sort(gen_finn_dt_tensor(t_dtype, [n_elems, n_thresholds])) model.set_tensor_datatype("inp", i_dtype) # noqa: Duplicate model execution model.set_tensor_datatype("thresholds", t_dtype) @@ -131,7 +134,11 @@ def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): # After this transformation, the thresholds and output data type should be # inferred correctly - assert model.get_tensor_datatype("thresholds") == i_dtype + if not i_dtype.signed(): + new_tdt = DataType.get_smallest_possible(i_dtype.max() + 1) + else: + new_tdt = DataType.get_smallest_possible(-(i_dtype.max() + 1) - 1) + assert model.get_tensor_datatype("thresholds") == new_tdt assert model.get_tensor_datatype("out") == o_dtype # After this transformation, the container type used to store the thresholds @@ -203,6 +210,7 @@ def test_round_and_clip_thresholds_ints(i_dtype, o_dtype, n_elems): 256, ], ) +@pytest.mark.streamline def test_round_and_clip_thresholds_floats(i_dtype, o_dtype, n_elems): i_dtype = DataType[i_dtype] t_dtype = DataType["FLOAT32"] @@ -244,7 +252,11 @@ def test_round_and_clip_thresholds_floats(i_dtype, o_dtype, n_elems): model = model.transform(RoundAndClipThresholds()) - assert model.get_tensor_datatype("thresholds") == i_dtype + if not i_dtype.signed(): + new_tdt = DataType.get_smallest_possible(i_dtype.max() + 1) + else: + new_tdt = DataType.get_smallest_possible(-(i_dtype.max() + 1) - 1) + assert model.get_tensor_datatype("thresholds") == new_tdt assert model.get_tensor_datatype("out") == o_dtype # After this transformation, the container type used to store the thresholds From b250047d444dfdc129bd667ce790c9c7982f2b39 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 11 Oct 2024 09:47:01 +0100 Subject: [PATCH 33/51] [tutorial] Update folding config to new custom operator structure --- tutorials/fpga_flow/folding_config.json | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tutorials/fpga_flow/folding_config.json b/tutorials/fpga_flow/folding_config.json index 642200d02b..bf94f8058d 100644 --- a/tutorials/fpga_flow/folding_config.json +++ b/tutorials/fpga_flow/folding_config.json @@ -1,30 +1,29 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 49, - "ram_style": "block" + "Thresholding_rtl_0": { + "PE": 49 }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 16, "SIMD": 49, "ram_style": "block" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 8, "SIMD": 8, "ram_style": "auto" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 8, "ram_style": "auto" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 10, "SIMD": 8, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } From b48147e0a6637659a8a7127dd0016edded998ed5 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 11 Oct 2024 10:36:20 +0100 Subject: [PATCH 34/51] [tutorial] Format tutorial README --- tutorials/fpga_flow/README.md | 44 ++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/tutorials/fpga_flow/README.md b/tutorials/fpga_flow/README.md index 2aaad0423b..71f2a2a625 100644 --- a/tutorials/fpga_flow/README.md +++ b/tutorials/fpga_flow/README.md @@ -25,20 +25,29 @@ This demo was created using Vivado 2022.1. Prior to running, insure the following prerequisites have been met: - Install FINN and prerequisites. The [Getting Started](https://finn.readthedocs.io/en/latest/getting_started.html#quickstart) section of the FINN documentation might be helpful for this. - Ensure you have the `FINN_XILINX_PATH` and `FINN_XILINX_VERSION` env variables set appropriately for your install. For example: -> export FINN_XILINX_PATH=/opt/Xilinx -> export FINN_XILINX_VERSION=2022.1 +```shell +export FINN_XILINX_PATH=/opt/Xilinx +export FINN_XILINX_VERSION=2022.1 +``` + - Set the env variable for your `finn` install top directory (where you cloned the FINN compiler repo): -> export FINN_ROOT=/home/foo/finn +```shell +export FINN_ROOT=/home/foo/finn +``` Then, change to `finn` install directory and invoke the build as follows: -> cd ${FINN_ROOT} -> ./run-docker.sh build_custom ${FINN_ROOT}/tutorials/fpga_flow/ +```shell +cd ${FINN_ROOT} +./run-docker.sh build_custom ${FINN_ROOT}/tutorials/fpga_flow/ +``` Alternatively, since the tutorials folder is already part of the FINN compiler installation, you can invoke it from within the Docker container: -> cd ${FINN_ROOT} -> ./run-docker.sh -> cd tutorials/fpga_flow -> python build.py +```shell +cd ${FINN_ROOT} +./run-docker.sh +cd tutorials/fpga_flow +python build.py +``` The build should finish in about 10 minutes, and the FINN docker will close on success. @@ -59,12 +68,14 @@ The build should finish in about 10 minutes, and the FINN docker will close on s ### Examine the Stitched IP Navigate to the stitched IP project directory: - -> cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/stitched_ip +```shell +cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/stitched_ip +``` And, open the project: - -> vivado finn_vivado_stitch_proj.xpr +```shell +vivado finn_vivado_stitch_proj.xpr +``` Explore the IPI board design and note the interfaces. @@ -89,9 +100,10 @@ them under `${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/sim`. Let's ex the FINN compiler. Used for launching the testbench simulation. You can now launch the simulation as follows: - -> cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/sim -> vivado -mode gui -source make_sim_proj.tcl +```shell +cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/sim +vivado -mode gui -source make_sim_proj.tcl +``` The simulation should complete with: From f6acf7075b3af97719edd3705f1268f0d357e0fa Mon Sep 17 00:00:00 2001 From: Alexander Hornburg Date: Wed, 23 Oct 2024 17:42:26 +0100 Subject: [PATCH 35/51] [Infra] support passing arguments to build_custom flow --- run-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run-docker.sh b/run-docker.sh index b1fe44eb0c..1358337a37 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -142,7 +142,7 @@ elif [ "$1" = "build_custom" ]; then DOCKER_INTERACTIVE="-it" #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build gecho "Running build_custom: $BUILD_CUSTOM_DIR/$FLOW_NAME.py" - DOCKER_CMD="python -mpdb -cc -cq $FLOW_NAME.py" + DOCKER_CMD="python -mpdb -cc -cq $FLOW_NAME.py ${@:4}" elif [ -z "$1" ]; then gecho "Running container only" DOCKER_CMD="bash" From 1d7636b8f8d841eda4e20b6cbd365b4a7257f24d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:41:32 +0000 Subject: [PATCH 36/51] Bump onnx from 1.13.0 to 1.17.0 Bumps [onnx](https://github.com/onnx/onnx) from 1.13.0 to 1.17.0. - [Release notes](https://github.com/onnx/onnx/releases) - [Changelog](https://github.com/onnx/onnx/blob/main/docs/Changelog-ml.md) - [Commits](https://github.com/onnx/onnx/compare/v1.13.0...v1.17.0) --- updated-dependencies: - dependency-name: onnx dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d4ca45cb37..85a0ca1175 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ gspread==3.6.0 importlib-resources==6.1.0 ipython==8.12.2 numpy==1.24.1 -onnx==1.13.0 +onnx==1.17.0 onnxoptimizer onnxruntime==1.16.1 pre-commit==3.3.2 From 14b68b7efa235089bf7e1d8d40416095bcb23e81 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 24 Oct 2024 14:29:36 +0100 Subject: [PATCH 37/51] [Infra] Add no-cache env var for run docker script --- run-docker.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/run-docker.sh b/run-docker.sh index 1358337a37..8bf6440d4f 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -102,6 +102,7 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${FINN_SINGULARITY=""} : ${FINN_SKIP_XRT_DOWNLOAD=""} : ${FINN_XRT_PATH=""} +: ${FINN_DOCKER_NO_CACHE="0"} DOCKER_INTERACTIVE="" @@ -190,12 +191,18 @@ if [ -d "$FINN_XRT_PATH" ];then export LOCAL_XRT=1 fi +if [ "$FINN_DOCKER_NO_CACHE" = "1" ]; then + export NO_CACHE_STRING="--no-cache" +else + export NO_CACHE_STRING="" +fi + # Build the FINN Docker image if [ "$FINN_DOCKER_PREBUILT" = "0" ] && [ -z "$FINN_SINGULARITY" ]; then # Need to ensure this is done within the finn/ root folder: OLD_PWD=$(pwd) cd $SCRIPTPATH - docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --build-arg SKIP_XRT=$FINN_SKIP_XRT_DOWNLOAD --build-arg LOCAL_XRT=$LOCAL_XRT --tag=$FINN_DOCKER_TAG $FINN_DOCKER_BUILD_EXTRA . + docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --build-arg SKIP_XRT=$FINN_SKIP_XRT_DOWNLOAD --build-arg LOCAL_XRT=$LOCAL_XRT --tag=$FINN_DOCKER_TAG $FINN_DOCKER_BUILD_EXTRA $NO_CACHE_STRING . cd $OLD_PWD fi From 72dcb87f510436d60ad0c370e6b90692ebf5b213 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 24 Oct 2024 14:41:37 +0100 Subject: [PATCH 38/51] [Infra] Re-use build extra env vars to enable no cache option --- run-docker.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/run-docker.sh b/run-docker.sh index 8bf6440d4f..69c998c467 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -192,9 +192,7 @@ if [ -d "$FINN_XRT_PATH" ];then fi if [ "$FINN_DOCKER_NO_CACHE" = "1" ]; then - export NO_CACHE_STRING="--no-cache" -else - export NO_CACHE_STRING="" + FINN_DOCKER_BUILD_EXTRA+="--no-cache" fi # Build the FINN Docker image @@ -202,7 +200,7 @@ if [ "$FINN_DOCKER_PREBUILT" = "0" ] && [ -z "$FINN_SINGULARITY" ]; then # Need to ensure this is done within the finn/ root folder: OLD_PWD=$(pwd) cd $SCRIPTPATH - docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --build-arg SKIP_XRT=$FINN_SKIP_XRT_DOWNLOAD --build-arg LOCAL_XRT=$LOCAL_XRT --tag=$FINN_DOCKER_TAG $FINN_DOCKER_BUILD_EXTRA $NO_CACHE_STRING . + docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --build-arg SKIP_XRT=$FINN_SKIP_XRT_DOWNLOAD --build-arg LOCAL_XRT=$LOCAL_XRT --tag=$FINN_DOCKER_TAG $FINN_DOCKER_BUILD_EXTRA . cd $OLD_PWD fi From f0aafa261e7a8f57891ba12cd1572e7d3062bc19 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 24 Oct 2024 15:19:55 +0100 Subject: [PATCH 39/51] [Infra] Add space to no cache var to allow for future extension --- run-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run-docker.sh b/run-docker.sh index 69c998c467..b59af88eb7 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -192,7 +192,7 @@ if [ -d "$FINN_XRT_PATH" ];then fi if [ "$FINN_DOCKER_NO_CACHE" = "1" ]; then - FINN_DOCKER_BUILD_EXTRA+="--no-cache" + FINN_DOCKER_BUILD_EXTRA+="--no-cache " fi # Build the FINN Docker image From a9f1898deccb74a4f8e38717c5bef00e46c9f70f Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Fri, 1 Nov 2024 11:35:04 +0000 Subject: [PATCH 40/51] Use Vivado tclstore from install instead of home --- run-docker.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/run-docker.sh b/run-docker.sh index b59af88eb7..ec55299f6c 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -231,6 +231,9 @@ DOCKER_EXEC+="-e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS " # Workaround for FlexLM issue, see: # https://community.flexera.com/t5/InstallAnywhere-Forum/Issues-when-running-Xilinx-tools-or-Other-vendor-tools-in-docker/m-p/245820#M10647 DOCKER_EXEC+="-e LD_PRELOAD=/lib/x86_64-linux-gnu/libudev.so.1 " +# Workaround for running multiple Vivado instances simultaneously, see: +# https://adaptivesupport.amd.com/s/article/63253?language=en_US +DOCKER_EXEC+="-e XILINX_LOCAL_USER_DATA=no " if [ "$FINN_DOCKER_RUN_AS_ROOT" = "0" ] && [ -z "$FINN_SINGULARITY" ];then DOCKER_EXEC+="-v /etc/group:/etc/group:ro " DOCKER_EXEC+="-v /etc/passwd:/etc/passwd:ro " From 016c425a44468f419eab97f8e9cde05072a49e26 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:44:54 +0000 Subject: [PATCH 41/51] [Feature] Timeout template added --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 14 ++++++ src/finn/custom_op/fpgadataflow/templates.py | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c03a9029db 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -474,3 +474,17 @@ def get_ap_int_max_w(self): ret = max([instream, outstream]) assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret return ret + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + + def timeout_condition(self): + """Set timeout condition for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())] + + def timeout_read_stream(self): + """Set reading output stream procedure for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ + "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..7ef74118ec 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -58,6 +58,51 @@ """ +# template for single node execution with timeout (for single clock hls operations) +docompute_template_timeout = """ +#define AP_INT_MAX_W $AP_INT_MAX_W$ +#include "cnpy.h" +#include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" +#include +#include "bnn-library.h" + +// includes for network parameters +$GLOBALS$ + +// defines for network parameters +$DEFINES$ + +int main(){ +$PRAGMAS$ + +$STREAMDECLARATIONS$ + +$READNPYDATA$ + +unsigned timeout = 0; +while(timeout < $TIMEOUT_VALUE$){ + +$DOCOMPUTE$ + +if($TIMEOUT_CONDITION$){ +timeout++; +} + +else{ +$TIMEOUT_READ_STREAM$ +timeout = 0; +} +} + +$DATAOUTSTREAM$ + +$SAVEASCNPY$ + +} + +""" + # templates for single node ip generation # cpp file From fe69308ef293093239d4c8137f80e71375e9bfaf Mon Sep 17 00:00:00 2001 From: mdaniowi Date: Fri, 20 Sep 2024 16:02:40 +0100 Subject: [PATCH 42/51] [Feature] npy2vectorstream.hpp include added to docompute_template --- src/finn/custom_op/fpgadataflow/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 7ef74118ec..d2100a7516 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" From f21da72ba791ec1a9423f7761b8806843dc417a0 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 5 Dec 2024 14:42:22 +0000 Subject: [PATCH 43/51] First draft of enabling both cpp interfaces in cppsim --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 55 +++++++++++++------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index c03a9029db..98b1dc80c9 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -54,6 +54,8 @@ def get_nodeattr_types(self): "code_gen_dir_cppsim": ("s", False, ""), "executable_path": ("s", False, ""), "res_hls": ("s", False, ""), + # temporary node attribute to keep track of interface style of hls ops + "cpp_interface": ("s", False, "packed", {"packed", "hls_vector"}), } def get_all_verilog_paths(self): @@ -206,7 +208,13 @@ def code_generation_cppsim(self, model): self.dataoutstrm() self.save_as_npy() - template = templates.docompute_template + if self.get_nodeattr("cpp_interface") == "hls_vector": + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + template = templates.docompute_template_timeout + else: + template = templates.docompute_template for key in self.code_gen_dict: # transform list into long string separated by '\n' @@ -422,27 +430,42 @@ def dataoutstrm(self): if dtype == DataType["BIPOLAR"]: # use binary for bipolar storage dtype = DataType["BINARY"] - elem_bits = dtype.bitwidth() - packed_bits = self.get_outstream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits elem_hls_type = dtype.get_hls_datatype_str() npy_type = "float" npy_out = "%s/output.npy" % code_gen_dir oshape = self.get_folded_output_shape() oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") - self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - self.hls_sname(), - oshape_cpp_str, - npy_out, - ) - ] + cpp_interface = self.get_nodeattr("cpp_interface") + + if cpp_interface == "packed": + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + self.hls_sname(), + oshape_cpp_str, + npy_out, + ) + ] + else: + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'vectorstream2npy<%s, %s, SIMD>(debug_out_%s, %s, "%s");' + % ( + elem_hls_type, + npy_type, + self.hls_sname(), + oshape_cpp_str, + npy_out, + ) + ] def save_as_npy(self): """Function to generate the commands for saving data in .npy file in c++""" From 0b5e80e2c0d41051f3969d49cda2ffae74470df8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 9 Dec 2024 14:14:54 +0000 Subject: [PATCH 44/51] [HLSBackend] Update hls vector cppsim methods --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 49 +++++++++++++------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index 98b1dc80c9..d8397c67fd 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -379,24 +379,40 @@ def read_npy_data(self): if dtype == DataType["BIPOLAR"]: # use binary for bipolar storage dtype = DataType["BINARY"] - elem_bits = dtype.bitwidth() - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits elem_hls_type = dtype.get_hls_datatype_str() npy_type = "float" npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] - self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - npy_in, - self.hls_sname(), + + cpp_interface = self.get_nodeattr("cpp_interface") + + if cpp_interface == "packed": + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) + ) + else: + folded_shape = self.get_folded_input_shape() + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2vectorstream<%s, %s, %d>("%s", in0_%s, false);' + % ( + elem_hls_type, + npy_type, + folded_shape[-1], + npy_in, + self.hls_sname(), + ) ) - ) def strm_decl(self): """Function to generate the commands for the stream declaration in c++, @@ -456,12 +472,13 @@ def dataoutstrm(self): ) ] else: + folded_shape = self.get_folded_output_shape() self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'vectorstream2npy<%s, %s, SIMD>(debug_out_%s, %s, "%s");' + 'vectorstream2npy<%s, %s, %d>(strm, %s, "%s");' % ( elem_hls_type, npy_type, - self.hls_sname(), + folded_shape[-1], oshape_cpp_str, npy_out, ) @@ -509,5 +526,5 @@ def timeout_condition(self): def timeout_read_stream(self): """Set reading output stream procedure for HLS functions defined for one clock cycle""" self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ - "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + "strm << out_{}.read();".format(self.hls_sname()) ] From 95bc8a66eb721e8924a820b2b2994792bdbda457 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 9 Dec 2024 14:36:11 +0000 Subject: [PATCH 45/51] [HLSBackend] Increase time out value --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8397c67fd..4677960ea8 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -517,7 +517,7 @@ def get_ap_int_max_w(self): def timeout_value(self): """Set timeout value for HLS functions defined for one clock cycle""" - self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["1000"] def timeout_condition(self): """Set timeout condition for HLS functions defined for one clock cycle""" From 7d0d3a9592169faf092de75e87985e8598d88334 Mon Sep 17 00:00:00 2001 From: Joshua Monson Date: Thu, 12 Dec 2024 00:24:58 +0000 Subject: [PATCH 46/51] switch blocking to non-blocking to blocking assignments in combination logic --- finn-rtllib/fifo/hdl/Q_srl.v | 144 +++++++++++++++++------------------ 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/finn-rtllib/fifo/hdl/Q_srl.v b/finn-rtllib/fifo/hdl/Q_srl.v index d1ce33c41f..0b01973163 100644 --- a/finn-rtllib/fifo/hdl/Q_srl.v +++ b/finn-rtllib/fifo/hdl/Q_srl.v @@ -184,58 +184,58 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount); end // always @ (posedge clock or negedge reset) always @* begin // - combi always - srlo_ <= 'bx; - shift_en_o_ <= 1'bx; - shift_en_ <= 1'bx; - addr_ <= 'bx; - state_ <= 2'bx; + srlo_ = 'bx; + shift_en_o_ = 1'bx; + shift_en_ = 1'bx; + addr_ = 'bx; + state_ = 2'bx; case (state) state_empty: begin // - (empty, will not produce) if (i_v) begin // - empty & i_v => consume - srlo_ <= i_d; - shift_en_o_ <= 1; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_one; + srlo_ = i_d; + shift_en_o_ = 1; + shift_en_ = 1'bx; + addr_ = 0; + state_ = state_one; end else begin // - empty & !i_v => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_empty; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 1'bx; + addr_ = 0; + state_ = state_empty; end end state_one: begin // - (contains one) if (i_v && o_b) begin // - one & i_v & o_b => consume - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1; - addr_ <= 0; - state_ <= state_more; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 1; + addr_ = 0; + state_ = state_more; end else if (i_v && !o_b) begin // - one & i_v & !o_b => cons+prod - srlo_ <= i_d; - shift_en_o_ <= 1; - shift_en_ <= 1; - addr_ <= 0; - state_ <= state_one; + srlo_ = i_d; + shift_en_o_ = 1; + shift_en_ = 1; + addr_ = 0; + state_ = state_one; end else if (!i_v && o_b) begin // - one & !i_v & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_one; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 1'bx; + addr_ = 0; + state_ = state_one; end else if (!i_v && !o_b) begin // - one & !i_v & !o_b => produce - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_empty; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 1'bx; + addr_ = 0; + state_ = state_empty; end end // case: state_one @@ -244,60 +244,60 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount); // - (full, will not consume) // - (full here if depth==2) if (o_b) begin // - full & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 0; - addr_ <= addr; - state_ <= state_more; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 0; + addr_ = addr; + state_ = state_more; end else begin // - full & !o_b => produce - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 0; -// addr_ <= addr-1; -// state_ <= state_more; - addr_ <= addr_zero_ ? 0 : addr-1; - state_ <= addr_zero_ ? state_one : state_more; + srlo_ = srl[addr]; + shift_en_o_ = 1; + shift_en_ = 0; +// addr_ = addr-1; +// state_ = state_more; + addr_ = addr_zero_ ? 0 : addr-1; + state_ = addr_zero_ ? state_one : state_more; end end else begin // - (mid: neither empty nor full) if (i_v && o_b) begin // - mid & i_v & o_b => consume - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1; - addr_ <= addr+1; - state_ <= state_more; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 1; + addr_ = addr+1; + state_ = state_more; end else if (i_v && !o_b) begin // - mid & i_v & !o_b => cons+prod - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 1; - addr_ <= addr; - state_ <= state_more; + srlo_ = srl[addr]; + shift_en_o_ = 1; + shift_en_ = 1; + addr_ = addr; + state_ = state_more; end else if (!i_v && o_b) begin // - mid & !i_v & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 0; - addr_ <= addr; - state_ <= state_more; + srlo_ = 'bx; + shift_en_o_ = 0; + shift_en_ = 0; + addr_ = addr; + state_ = state_more; end else if (!i_v && !o_b) begin // - mid & !i_v & !o_b => produce - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 0; - addr_ <= addr_zero_ ? 0 : addr-1; - state_ <= addr_zero_ ? state_one : state_more; + srlo_ = srl[addr]; + shift_en_o_ = 1; + shift_en_ = 0; + addr_ = addr_zero_ ? 0 : addr-1; + state_ = addr_zero_ ? state_one : state_more; end end // else: !if(addr_full) end // case: state_more default: begin - srlo_ <= 'bx; - shift_en_o_ <= 1'bx; - shift_en_ <= 1'bx; - addr_ <= 'bx; - state_ <= 2'bx; + srlo_ = 'bx; + shift_en_o_ = 1'bx; + shift_en_ = 1'bx; + addr_ = 'bx; + state_ = 2'bx; end // case: default endcase // case(state) From abb96d6fd9edb6699f59a626d2bd4675d0eb17d3 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Tue, 7 Jan 2025 17:48:06 +0000 Subject: [PATCH 47/51] Move build dir creation into test --- tests/brevitas/test_brevitas_fc.py | 3 +-- tests/transformation/streamline/test_streamline_cnv.py | 3 +-- tests/transformation/streamline/test_streamline_fc.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py index 842d099f57..a7a73a5ed4 100644 --- a/tests/brevitas/test_brevitas_fc.py +++ b/tests/brevitas/test_brevitas_fc.py @@ -45,8 +45,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_brevitas_fc_") - @pytest.mark.brevitas_export # act bits @@ -61,6 +59,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_brevitas_fc_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) ishape = (1, 1, 28, 28) diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py index 8a91a49278..9e206c843a 100644 --- a/tests/transformation/streamline/test_streamline_cnv.py +++ b/tests/transformation/streamline/test_streamline_cnv.py @@ -50,8 +50,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_streamline_cnv_") - @pytest.mark.streamline # act bits @@ -64,6 +62,7 @@ def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_streamline_cnv_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) export_qonnx(fc, torch.randn(1, 3, 32, 32), finn_onnx) diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py index edc4a96fe2..9ce2f2ab65 100644 --- a/tests/transformation/streamline/test_streamline_fc.py +++ b/tests/transformation/streamline/test_streamline_fc.py @@ -52,8 +52,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_streamline_fc_") - @pytest.mark.streamline # act bits @@ -68,6 +66,7 @@ def test_streamline_fc(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_streamline_fc_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) export_qonnx(fc, torch.randn(1, 1, 28, 28), finn_onnx) From 28255c31d649e0d323b98a48a1e266adadecaf5e Mon Sep 17 00:00:00 2001 From: jsmonson Date: Fri, 10 Jan 2025 11:30:43 -0700 Subject: [PATCH 48/51] Add V80 to Alveo part_map --- src/finn/util/basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 0cb029a888..3f5f3960e4 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -69,6 +69,7 @@ alveo_part_map["U250"] = "xcu250-figd2104-2L-e" alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e" +alveo_part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" alveo_default_platform = dict() alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" From d2e89dff4f601e948798f36fc759b49936ebd5c5 Mon Sep 17 00:00:00 2001 From: jsmonson Date: Mon, 13 Jan 2025 09:27:11 -0700 Subject: [PATCH 49/51] add V80 similar to other Versal Parts --- src/finn/util/basic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3f5f3960e4..870f9f6fa6 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -69,7 +69,6 @@ alveo_part_map["U250"] = "xcu250-figd2104-2L-e" alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e" -alveo_part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" alveo_default_platform = dict() alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" @@ -82,7 +81,7 @@ part_map = {**pynq_part_map, **alveo_part_map} part_map["VEK280"] = "xcve2802-vsvh1760-2MP-e-S" part_map["VCK190"] = "xcvc1902-vsva2197-2MP-e-S" - +part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" def get_rtlsim_trace_depth(): """Return the trace depth for rtlsim via PyVerilator. Controllable From ba0261fd2d431568917f1ece7f8569da2daf14ec Mon Sep 17 00:00:00 2001 From: jsmonson Date: Mon, 13 Jan 2025 09:32:00 -0700 Subject: [PATCH 50/51] add corrected spacing --- src/finn/util/basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 870f9f6fa6..5eb72194ea 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -83,6 +83,7 @@ part_map["VCK190"] = "xcvc1902-vsva2197-2MP-e-S" part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" + def get_rtlsim_trace_depth(): """Return the trace depth for rtlsim via PyVerilator. Controllable via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is From 65a83b2f7943219acbf0f5bc427da46034cdadab Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 14 Jan 2025 16:32:42 +0000 Subject: [PATCH 51/51] [Builder] Relax requirements to derive fpga part for specific board --- src/finn/builder/build_dataflow_config.py | 11 +++++------ tests/fpgadataflow/test_fifosizing.py | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 5d69802337..d6437a2e5c 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -35,7 +35,7 @@ from typing import Any, List, Optional from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy -from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map +from finn.util.basic import alveo_default_platform, part_map class AutoFIFOSizingMethod(str, Enum): @@ -370,11 +370,10 @@ def _resolve_driver_platform(self): def _resolve_fpga_part(self): if self.fpga_part is None: # lookup from part map if not specified - if self.shell_flow_type == ShellFlowType.VIVADO_ZYNQ: - return pynq_part_map[self.board] - elif self.shell_flow_type == ShellFlowType.VITIS_ALVEO: - return alveo_part_map[self.board] - else: + try: + fpga_part = part_map[self.board] + return fpga_part + except KeyError: raise Exception("Couldn't resolve fpga_part for " + self.board) else: # return as-is when explicitly specified diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 338204c0c7..e5f9659665 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -70,7 +70,6 @@ def test_fifosizing_linear(method, topology): synth_clk_period_ns=10.0, board="Pynq-Z1", rtlsim_batch_size=100 if topology == "tfc" else 2, - shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.STITCHED_IP,