From 2b20386e6fde934eb6d636a10d41aa8a8ab5620c Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Mon, 20 Nov 2023 15:10:03 +0100 Subject: [PATCH 1/7] Make quantized activation handlers data layout aware This probably is still rather sketchy, but at least it tries to check the data layout annotation. For now seems to be enough for getting the thresholds of multi-head attention right, IF qonnx properly annotates the 3D layouts. --- .../qonnx/qonnx_activation_handlers.py | 59 +++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 323e391df4..451ba52c29 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -25,8 +25,8 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - import numpy as np +import warnings from abc import ABC, abstractmethod from onnx import TensorProto, helper from qonnx.core.modelwrapper import ModelWrapper @@ -70,7 +70,7 @@ def _check_compatibility(self): @abstractmethod def _calculate_act_bias(self): """Calculate the activation bias, - which is introduced as an Add node behind the MultiTrheshold node. + which is introduced as an Add node behind the MultiThreshold node. """ raise NotImplementedError() @@ -82,7 +82,7 @@ def _calculate_thresholds(self): @abstractmethod def _calculate_act_scale(self): """Calculate the activation scale, - which is indroduced as a Mul node behind the Add node + which is introduced as a Mul node behind the Add node for the activation bias. """ raise NotImplementedError() @@ -157,7 +157,7 @@ def replace_quant_node(self): # Set scale and bias # If these values are scalar then they can be set as attributes # of the MultiThreshold node, if not they get inserted as adder and mul nodes - # behind the MultiTrheshold nodes. + # behind the MultiThreshold nodes. bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0 scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0 if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant": @@ -355,7 +355,7 @@ def _calculate_thresholds(self): act_node = self._model.find_direct_predecessors(self._q_node) act_node = act_node[0] if act_node.op_type == "Relu": - # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/ + # Calculate thresholds, see: https://github.com/Xilinx/brevitas/blob/ # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/ # onnx/finn/handler/act.py#L21 num_distinct_values = 2**bit_width @@ -395,8 +395,27 @@ def _calculate_thresholds(self): else: thresholds[c][t] = step / selu_scale + # First try to consider the tensor layout of the output for determining + # the number of output channels + layout = self._model.get_tensor_layout(self._q_node.output[0]) + # If there is a layout annotation, use this to determine the index of + # the channel dimension + if layout is not None and "C" in layout: + # Lookup the index in list + cdim = layout.index("C") + # If no layout has been annotated or there is no channel dimension, fall + # back to the previous default assumption + else: + # Assume the channels to be in axis 1 + cdim = 1 + # Issue a warning to the user, so they are aware of this + warnings.warn( + f"No layout annotations for {self._q_node.output[0]}:" + f" Assuming channel dimension at index {cdim}" + ) + # ToDo: The index 1 needs to be changed to -1 for the channels last format - num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1] + num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim] final_shape = (num_output_channels, num_thresholds) if thresholds.shape != final_shape: thresholds = np.broadcast_to(thresholds, final_shape) @@ -417,12 +436,12 @@ def _remove_activation_node(self, multi_threshold_node): act_node = self._model.find_direct_predecessors(self._q_node) if act_node is None: raise RuntimeError( - "For handling of Relu activations a predecesor to " "the Quant node must exist." + "For handling of Relu activations a predecessor to " "the Quant node must exist." ) act_node = act_node[0] if act_node.op_type not in self.valid_predecessor_op_types(): raise RuntimeError( - "The predecesor of the Quant node must be Relu or Selu for handling " + "The predecessor of the Quant node must be Relu or Selu for handling " "of activations." ) @@ -509,7 +528,7 @@ def _calculate_thresholds(self): else: raise RuntimeError("Got an unexpected quantizer node type") - # Calculate thersholds, see: https://github.com/Xilinx/brevitas/ + # Calculate thresholds, see: https://github.com/Xilinx/brevitas/ # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/ # export/onnx/finn/handler/act.py#L76 if bit_width == 1.0: @@ -537,8 +556,28 @@ def _calculate_thresholds(self): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t + # First try to consider the tensor layout of the output for + # determining the number of output channels + layout = self._model.get_tensor_layout(self._q_node.output[0]) + # If there is a layout annotation, use this to determine the index + # of the channel dimension + if layout is not None and "C" in layout: + # Lookup the index in list + cdim = layout.index("C") + # If no layout has been annotated or there is no channel dimension, + # fall back to the previous default assumption + else: + # Assume the channels to be in axis 1 + cdim = 1 + # Issue a warning to the user, so they are aware of this + warnings.warn( + f"No layout annotations for {self._q_node.output[0]}:" + f" Assuming channel dimension at index {cdim}" + ) + # ToDo: The index 1 needs to be changed to -1 for the channels last format - num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1] + num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim] + final_shape = (num_output_channels, num_thresholds) if thresholds.shape != final_shape: thresholds = np.broadcast_to(thresholds, final_shape) From 4a69267a274c14924cf59f0cfa900d4053d01490 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Fri, 13 Sep 2024 16:33:51 +0200 Subject: [PATCH 2/7] Consider the data layout of the input tensor for the MultiThreshold --- .../qonnx/qonnx_activation_handlers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 451ba52c29..1158253aea 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -139,6 +139,8 @@ def replace_quant_node(self): graph.value_info.append(thresh_tensor) model.set_initializer(thresh_tensor.name, thresholds) + data_layout = model.get_tensor_layout(n.input[0]) + # Insert MultiThreshold node outp_trans_node = helper.make_node( "MultiThreshold", @@ -154,6 +156,11 @@ def replace_quant_node(self): mt_node = graph.node[running_node_index - 1] mt_inst = getCustomOp(mt_node) + # Inherit the data layout from the input tensor if available + if data_layout is not None: + # Convert list to string representation of the data layout + mt_inst.set_nodeattr("data_layout", "".join(data_layout)) + # Set scale and bias # If these values are scalar then they can be set as attributes # of the MultiThreshold node, if not they get inserted as adder and mul nodes @@ -395,9 +402,9 @@ def _calculate_thresholds(self): else: thresholds[c][t] = step / selu_scale - # First try to consider the tensor layout of the output for determining + # First try to consider the tensor layout of the input for determining # the number of output channels - layout = self._model.get_tensor_layout(self._q_node.output[0]) + layout = self._model.get_tensor_layout(self._q_node.input[0]) # If there is a layout annotation, use this to determine the index of # the channel dimension if layout is not None and "C" in layout: @@ -410,7 +417,7 @@ def _calculate_thresholds(self): cdim = 1 # Issue a warning to the user, so they are aware of this warnings.warn( - f"No layout annotations for {self._q_node.output[0]}:" + f"No layout annotations for {self._q_node.input[0]}:" f" Assuming channel dimension at index {cdim}" ) @@ -556,9 +563,9 @@ def _calculate_thresholds(self): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t - # First try to consider the tensor layout of the output for + # First try to consider the tensor layout of the input for # determining the number of output channels - layout = self._model.get_tensor_layout(self._q_node.output[0]) + layout = self._model.get_tensor_layout(self._q_node.input[0]) # If there is a layout annotation, use this to determine the index # of the channel dimension if layout is not None and "C" in layout: @@ -571,7 +578,7 @@ def _calculate_thresholds(self): cdim = 1 # Issue a warning to the user, so they are aware of this warnings.warn( - f"No layout annotations for {self._q_node.output[0]}:" + f"No layout annotations for {self._q_node.input[0]}:" f" Assuming channel dimension at index {cdim}" ) From 28255c31d649e0d323b98a48a1e266adadecaf5e Mon Sep 17 00:00:00 2001 From: jsmonson Date: Fri, 10 Jan 2025 11:30:43 -0700 Subject: [PATCH 3/7] Add V80 to Alveo part_map --- src/finn/util/basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 0cb029a888..3f5f3960e4 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -69,6 +69,7 @@ alveo_part_map["U250"] = "xcu250-figd2104-2L-e" alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e" +alveo_part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" alveo_default_platform = dict() alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" From d2e89dff4f601e948798f36fc759b49936ebd5c5 Mon Sep 17 00:00:00 2001 From: jsmonson Date: Mon, 13 Jan 2025 09:27:11 -0700 Subject: [PATCH 4/7] add V80 similar to other Versal Parts --- src/finn/util/basic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3f5f3960e4..870f9f6fa6 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -69,7 +69,6 @@ alveo_part_map["U250"] = "xcu250-figd2104-2L-e" alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e" -alveo_part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" alveo_default_platform = dict() alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" @@ -82,7 +81,7 @@ part_map = {**pynq_part_map, **alveo_part_map} part_map["VEK280"] = "xcve2802-vsvh1760-2MP-e-S" part_map["VCK190"] = "xcvc1902-vsva2197-2MP-e-S" - +part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" def get_rtlsim_trace_depth(): """Return the trace depth for rtlsim via PyVerilator. Controllable From ba0261fd2d431568917f1ece7f8569da2daf14ec Mon Sep 17 00:00:00 2001 From: jsmonson Date: Mon, 13 Jan 2025 09:32:00 -0700 Subject: [PATCH 5/7] add corrected spacing --- src/finn/util/basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 870f9f6fa6..5eb72194ea 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -83,6 +83,7 @@ part_map["VCK190"] = "xcvc1902-vsva2197-2MP-e-S" part_map["V80"] = "xcv80-lsva4737-2MHP-e-s" + def get_rtlsim_trace_depth(): """Return the trace depth for rtlsim via PyVerilator. Controllable via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is From 65a83b2f7943219acbf0f5bc427da46034cdadab Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 14 Jan 2025 16:32:42 +0000 Subject: [PATCH 6/7] [Builder] Relax requirements to derive fpga part for specific board --- src/finn/builder/build_dataflow_config.py | 11 +++++------ tests/fpgadataflow/test_fifosizing.py | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 5d69802337..d6437a2e5c 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -35,7 +35,7 @@ from typing import Any, List, Optional from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy -from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map +from finn.util.basic import alveo_default_platform, part_map class AutoFIFOSizingMethod(str, Enum): @@ -370,11 +370,10 @@ def _resolve_driver_platform(self): def _resolve_fpga_part(self): if self.fpga_part is None: # lookup from part map if not specified - if self.shell_flow_type == ShellFlowType.VIVADO_ZYNQ: - return pynq_part_map[self.board] - elif self.shell_flow_type == ShellFlowType.VITIS_ALVEO: - return alveo_part_map[self.board] - else: + try: + fpga_part = part_map[self.board] + return fpga_part + except KeyError: raise Exception("Couldn't resolve fpga_part for " + self.board) else: # return as-is when explicitly specified diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 338204c0c7..e5f9659665 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -70,7 +70,6 @@ def test_fifosizing_linear(method, topology): synth_clk_period_ns=10.0, board="Pynq-Z1", rtlsim_batch_size=100 if topology == "tfc" else 2, - shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.STITCHED_IP, From c2905f7cdff4dc4f620a9863042df0b8aeff8d75 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Tue, 21 Jan 2025 15:12:12 +0100 Subject: [PATCH 7/7] Make activation handler guess the layout based on tensor rank if missing --- .../qonnx/qonnx_activation_handlers.py | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 36181e7a48..8085e5a8e0 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -402,12 +402,24 @@ def _calculate_thresholds(self): else: thresholds[c][t] = step / selu_scale - # First try to consider the tensor layout of the input for determining - # the number of output channels + # Get the shape of the input (should also be the output) tensor + # Note: Querying the input is more safe as we do not want to + # propagate shapes backwards by accident. + shape = self._model.get_tensor_shape(self._q_node.input[0]) # noqa + # First try to consider the tensor layout of the input for + # determining the number of output channels layout = self._model.get_tensor_layout(self._q_node.input[0]) - # If there is a layout annotation, use this to determine the index of - # the channel dimension - if layout is not None and "C" in layout: + # If there is no layout annotation, guess based on rank of the + # tensor + # TODO: No support for Rank >= 5 + if layout is None and len(shape) < 5: + # Maps tensor rank to layout annotation + rank_to_layout = {0: None, 1: "C", 2: "NC", 3: "NWC", 4: "NCHW"} + # Lookup the layout required by this input shape + layout = rank_to_layout[len(shape)] + # If there is a layout annotation, use this to determine the index + # of the channel dimension + if layout is not None and "C" in layout: # noqa: Duplicate # Lookup the index in list cdim = layout.index("C") # If no layout has been annotated or there is no channel dimension, fall @@ -570,12 +582,24 @@ def _calculate_thresholds(self): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t + # Get the shape of the input (should also be the output) tensor + # Note: Querying the input is more safe as we do not want to + # propagate shapes backwards by accident. + shape = self._model.get_tensor_shape(self._q_node.input[0]) # First try to consider the tensor layout of the input for # determining the number of output channels - layout = self._model.get_tensor_layout(self._q_node.input[0]) + layout = self._model.get_tensor_layout(self._q_node.input[0]) # noqa + # If there is no layout annotation, guess based on rank of the + # tensor + # TODO: No support for Rank >= 5 + if layout is None and len(shape) < 5: + # Maps tensor rank to layout annotation + rank_to_layout = {0: None, 1: "C", 2: "NC", 3: "NWC", 4: "NCHW"} + # Lookup the layout required by this input shape + layout = rank_to_layout[len(shape)] # If there is a layout annotation, use this to determine the index # of the channel dimension - if layout is not None and "C" in layout: + if layout is not None and "C" in layout: # noqa: Duplicate # Lookup the index in list cdim = layout.index("C") # If no layout has been annotated or there is no channel dimension,