From 3002e6239903af9f9f9444ef3fbbb8935ba2bb92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 20 Sep 2022 11:08:59 +0100
Subject: [PATCH 001/111] HDL for new thresholding by binary search.

---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 153 ++++++++++++++
 .../thresholding/hdl/thresholding_axi.sv      | 198 ++++++++++++++++++
 .../hdl/thresholding_axi_wrapper.v            | 122 +++++++++++
 3 files changed, 473 insertions(+)
 create mode 100644 finn-rtllib/thresholding/hdl/thresholding.sv
 create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi.sv
 create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
new file mode 100644
index 0000000000..93ccdc51c5
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -0,0 +1,153 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Pipelined thresholding by binary search.
+ * @author	Thomas B. Preußer <tpreusse@amd.com>
+ *
+ * @description
+ *  Produces the N-bit count of those among 2^N-1 thresholds that are not
+ *  larger than the corresponding input:
+ *     y = Σ(T_i <= x)
+ *  The result is computed by binary search. The runtime-configurable
+ *  thresholds must be written in ascending order:
+ *     i < j => T_i < T_j
+ *  The design supports channel folding allowing each input to be processed
+ *  with respect to a selectable set of thresholds. The corresponding
+ *  threshold configuration relies on a channel address prefix. Inputs are
+ *  accompanied by a channel selector.
+ *****************************************************************************/
+module thresholding #(
+	int unsigned  N,  // output precision
+	int unsigned  M,  // input/threshold precision
+	int unsigned  C,  // number of channels
+
+	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C)
+)(
+	// Global Control
+	input	logic  clk,
+	input	logic  rst,
+
+	// Threshold Configuration
+	input	logic  twe,
+	input	logic [$clog2(C)+N-1:0]  twa,
+	input	logic [          M-1:0]  twd,
+
+	// Clock Enable for Stream Processing
+	input	logic  en,
+
+	// Input Stream
+	input	logic  ivld,
+	input	logic [C_BITS-1:0]  icnl,	// Ignored for C == 1
+	input	logic [M     -1:0]  idat,
+
+	// Output Stream
+	output	logic  ovld,
+	output	logic [C_BITS-1:0]  ocnl,
+	output	logic [N     -1:0]  odat
+);
+
+	// Pipeline Links & Feed
+	typedef struct packed {
+		logic               vld;	// Valid data identification
+		logic [C_BITS-1:0]  cnl;	// Channel
+		logic [M     -1:0]  val;	// Original input value
+		logic [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
+	} pipe_t;
+	uwire pipe_t  pipe[0:N];
+	assign	pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} };	// Feed original input
+
+	// Stages: 0, 1, ..., N-1
+	uwire [0:N-1]  tws = (twa[N-1:0]+1) & ~twa[N-1:0];   // Write Select per stage by address suffix
+	for(genvar  stage = 0; stage < N; stage++) begin : genStages
+
+		// Threshold Memory
+		uwire [M-1:0]  thresh;
+		if(1) begin : blkUpdate
+
+			// Write control: local select from global address
+			uwire  we = twe && tws[stage];
+			if((C == 1) && (stage == 0)) begin
+				logic [M-1:0]  Thresh = 'x;
+				always_ff @(posedge clk) begin
+					if(rst)      Thresh <= 'x;
+					else if(we)  Thresh <= twd;
+				end
+				assign  thresh = Thresh;
+			end
+			else begin
+				logic [M-1:0]  Threshs[C * 2**stage];
+				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
+				uwire [$clog2(C)+stage-1:0]  ra;
+				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
+				if(stage)  assign  ra[stage-1:0]     = pipe[stage].res[0:stage-1];
+
+				// Write
+				always_ff @(posedge clk) begin
+					if(we)  Threshs[wa] <= twd;
+				end
+
+				// Read
+				logic [M-1:0]  RdReg;
+				always_ff @(posedge clk) begin
+					if(en)  RdReg <= Threshs[ra];
+				end
+				assign	thresh = RdReg;
+			end
+
+		end : blkUpdate
+
+		// Pipeline regs simply copying the input
+		pipe_t  State = '{ vld: 0, cnl: 'x, val: 'x, res: 'x };
+		always_ff @(posedge clk) begin
+			if(rst)      State <= '{ vld: 0, cnl: 'x, val: 'x, res: 'x };
+			else if(en)  State <= pipe[stage];
+		end
+
+		// Assemble pipeline data
+		logic [0:N-1]  res;
+		always_comb begin
+			res        = State.res;
+			res[stage] = thresh <= State.val;	// Patch in next result bit
+		end
+		assign	pipe[stage+1] = '{
+			vld: State.vld,
+			cnl: State.cnl,
+			val: State.val,
+			res: res
+		};
+
+	end : genStages
+
+	// Output
+	assign	ovld = pipe[N].vld;
+	assign	ocnl = pipe[N].cnl;
+	assign	odat = pipe[N].res;
+
+endmodule : thresholding
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
new file mode 100644
index 0000000000..71e54c5ca0
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -0,0 +1,198 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	All-AXI interface adapter for thresholding module.
+ * @author	Thomas B. Preußer <tpreusse@amd.com>
+ *****************************************************************************/
+
+module thresholding_axi #(
+	int unsigned  N,	// output precision
+	int unsigned  M,	// input/threshold precision
+	int unsigned  C		// Channels
+)(
+	//- Global Control ------------------
+	input	logic  ap_clk,
+	input	logic  ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input	logic                    s_axilite_AWVALID,
+	output	logic                    s_axilite_AWREADY,
+	input	logic [$clog2(C)+N-1:0]  s_axilite_AWADDR,
+
+	input	logic         s_axilite_WVALID,
+	output	logic         s_axilite_WREADY,
+	input	logic [31:0]  s_axilite_WDATA,
+	input	logic [ 3:0]  s_axilite_WSTRB,
+
+	output	logic        s_axilite_BVALID,
+	input	logic        s_axilite_BREADY,
+	output	logic [1:0]  s_axilite_BRESP,
+
+	// Reading
+	input	logic        s_axilite_ARVALID,
+	output	logic        s_axilite_ARREADY,
+	input	logic [0:0]  s_axilite_ARADDR,
+
+	output	logic         s_axilite_RVALID,
+	input	logic         s_axilite_RREADY,
+	output	logic [31:0]  s_axilite_RDATA,
+	output	logic [ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output	logic  s_axis_tready,
+	input	logic  s_axis_tvalid,
+	input	logic [((M+7)/8)*8-1:0]  s_axis_tdata,
+
+	//- AXI Stream - Output -------------
+	input	logic  m_axis_tready,
+	output	logic  m_axis_tvalid,
+	output	logic [((N+7)/8)*8-1:0]  m_axis_tdata
+);
+	//- Global Control ------------------------------------------------------
+	uwire  clk = ap_clk;
+	uwire  rst = !ap_rst_n;
+
+	//- AXI Lite: Threshold Configuration -----------------------------------
+	uwire  twe;
+	uwire [$clog2(C)+N-1:0]  twa;
+	uwire [          M-1:0]  twd;
+	if(1) begin : blkAxiLite
+		logic  WABusy = 0;
+		logic  WDBusy = 0;
+		logic [$clog2(C)+N-1:0]  Addr = 'x;
+		logic [          M-1:0]  Data = 'x;
+
+		assign	twe = WABusy && WDBusy;
+		assign	twa = Addr;
+		assign	twd = Data;
+
+		uwire  clr_wr = rst || (twe && s_axilite_BREADY);
+		always_ff @(posedge clk) begin : blockName
+			if(clr_wr) begin
+				WABusy <= 0;
+				Addr <= 'x;
+				WDBusy <= 0;
+				Data <= 'x;
+			end
+			else begin
+				if(!WABusy) begin
+					WABusy <= s_axilite_AWVALID;
+					Addr   <= s_axilite_AWADDR[$clog2(C)+N-1:0];
+				end
+				if(!WDBusy) begin
+					WDBusy <= s_axilite_WVALID;
+					Data   <= s_axilite_WDATA[M-1:0];
+				end
+			end
+		end
+		assign	s_axilite_AWREADY = !WABusy;
+		assign	s_axilite_WREADY  = !WDBusy;
+		assign	s_axilite_BVALID  = WABusy && WDBusy;
+		assign	s_axilite_BRESP   = '0; // OK
+
+		// Answer all reads with '1
+		logic  RValid =  0;
+		uwire  clr_rd = rst || (RValid && s_axilite_RREADY);
+		always_ff @(posedge clk) begin
+			if(clr_rd)        RValid <=  0;
+			else if(!RValid)  RValid <= s_axilite_ARVALID;
+		end
+		assign	s_axilite_ARREADY = !RValid;
+		assign	s_axilite_RVALID  = RValid;
+		assign	s_axilite_RDATA   = '1;
+		assign	s_axilite_RRESP   = '0; // OK
+
+	end : blkAxiLite
+
+	//- IO-Sandwich with two-stage output buffer for containing a local enable
+	uwire  en;
+	uwire [N-1:0]  odat;
+	uwire  ovld;
+	if(1) begin : blkOutputDecouple
+		typedef struct {
+			logic          vld;
+			logic [N-1:0]  dat;
+		} buf_t;
+		buf_t  Buf[2] = '{ default: '{ vld: 0, dat: 'x } };
+		always_ff @(posedge clk) begin
+			if(rst)  Buf <= '{ default: '{ vld: 0, dat: 'x } };
+			else begin
+				if(!Buf[1].vld || m_axis_tready) begin
+					Buf[1] <= '{
+						vld: Buf[0].vld || ovld,
+						dat: Buf[0].vld? Buf[0].dat : odat
+					};
+				end
+				Buf[0].vld <= Buf[1].vld && !m_axis_tready && (Buf[0].vld || ovld);
+				if(!Buf[0].vld)  Buf[0].dat <= odat;
+			end
+		end
+		assign	en = !Buf[0].vld;
+
+		assign	m_axis_tvalid = Buf[1].vld;
+		assign	m_axis_tdata  = Buf[1].dat;
+
+	end : blkOutputDecouple
+
+	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C);
+	uwire  ivld = s_axis_tvalid;
+	uwire [C_BITS-1:0]  icnl;
+	uwire [M     -1:0]  idat = s_axis_tdata[M-1:0];
+	assign	s_axis_tready = en;
+	if(C == 1)  assign  icnl = 'x;
+	else begin
+		logic [C_BITS-1:0]  Chnl = 0;
+		logic               Last = 0;
+		uwire  inc = ivld && en;
+		uwire  clr = rst || (Last && inc);
+		always_ff @(posedge clk) begin
+			if(clr) begin
+				Chnl <= 0;
+				Last <= 0;
+			end
+			else if(inc) begin
+				Chnl <= Chnl + 1;
+				Last <= (~Chnl & (C-2)) == 0;
+			end
+		end
+		assign	icnl = Chnl;
+	end
+
+	// Core Thresholding Module
+	thresholding #(.N(N), .M(M), .C(C)) core (
+		.clk, .rst,
+		.twe, .twa, .twd,
+		.en,
+		.ivld, .icnl, .idat,
+		.ovld, .ocnl(), .odat
+	);
+
+endmodule : thresholding_axi
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
new file mode 100644
index 0000000000..bb6b17b32f
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	IPI-compatible Verilog wrapper for thresholding_axi module.
+ * @author	Thomas B. Preußer <tpreusse@amd.com>
+ *****************************************************************************/
+
+module thresholding_axi_wrapper #(
+	parameter  N,	// output precision
+	parameter  M,	// input/threshold precision
+	parameter  C,	// Channels
+	parameter  C_BITS //= $clog2(C)
+)(
+	//- Global Control ------------------
+	input	ap_clk,
+	input	ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input	                s_axilite_AWVALID,
+	output	                s_axilite_AWREADY,
+	input	[C_BITS+N-1:0]  s_axilite_AWADDR,
+
+	input	        s_axilite_WVALID,
+	output	        s_axilite_WREADY,
+	input	[31:0]  s_axilite_WDATA,
+	input	[ 3:0]  s_axilite_WSTRB,
+
+	output	       s_axilite_BVALID,
+	input	       s_axilite_BREADY,
+	output	[1:0]  s_axilite_BRESP,
+
+	// Reading
+	input	       s_axilite_ARVALID,
+	output	       s_axilite_ARREADY,
+	input	[0:0]  s_axilite_ARADDR,
+
+	output	        s_axilite_RVALID,
+	input	        s_axilite_RREADY,
+	output	[31:0]  s_axilite_RDATA,
+	output	[ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output	s_axis_tready,
+	input	s_axis_tvalid,
+	input	[((M+7)/8)*8-1:0]  s_axis_tdata,
+
+	//- AXI Stream - Output -------------
+	input	m_axis_tready,
+	output	m_axis_tvalid,
+	output	[((N+7)/8)*8-1:0]  m_axis_tdata
+);
+
+	thresholding_axi #(.N(N), .M(M), .C(C)) inst (
+		//- Global Control ------------------
+		.ap_clk(ap_clk),
+		.ap_rst_n(ap_rst_n),
+
+		//- AXI Lite ------------------------
+		// Writing
+		.s_axilite_AWVALID(s_axilite_AWVALID),
+		.s_axilite_AWREADY(s_axilite_AWREADY),
+		.s_axilite_AWADDR(s_axilite_AWADDR),
+
+		.s_axilite_WVALID(s_axilite_WVALID),
+		.s_axilite_WREADY(s_axilite_WREADY),
+		.s_axilite_WDATA(s_axilite_WDATA),
+		.s_axilite_WSTRB(s_axilite_WSTRB),
+
+		.s_axilite_BVALID(s_axilite_BVALID),
+		.s_axilite_BREADY(s_axilite_BREADY),
+		.s_axilite_BRESP(s_axilite_BRESP),
+
+		// Reading
+		.s_axilite_ARVALID(s_axilite_ARVALID),
+		.s_axilite_ARREADY(s_axilite_ARREADY),
+		.s_axilite_ARADDR(s_axilite_ARADDR),
+
+		.s_axilite_RVALID(s_axilite_RVALID),
+		.s_axilite_RREADY(s_axilite_RREADY),
+		.s_axilite_RDATA(s_axilite_RDATA),
+		.s_axilite_RRESP(s_axilite_RRESP),
+
+		//- AXI Stream - Input --------------
+		.s_axis_tready(s_axis_tready),
+		.s_axis_tvalid(s_axis_tvalid),
+		.s_axis_tdata(s_axis_tdata),
+
+		//- AXI Stream - Output -------------
+		.m_axis_tready(m_axis_tready),
+		.m_axis_tvalid(m_axis_tvalid),
+		.m_axis_tdata(m_axis_tdata)
+	);
+
+endmodule : thresholding_axi_wrapper

From 3c92c2fc460fb5e45fdb0dfcc0b92c572ae65ce7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 20 Sep 2022 13:33:01 +0100
Subject: [PATCH 002/111] IP core support files for thresholding module.

---
 finn-rtllib/thresholding/component.xml        | 817 ++++++++++++++++++
 .../xgui/thresholding_axi_wrapper_v1_0.tcl    |  74 ++
 2 files changed, 891 insertions(+)
 create mode 100644 finn-rtllib/thresholding/component.xml
 create mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl

diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml
new file mode 100644
index 0000000000..0a56f93316
--- /dev/null
+++ b/finn-rtllib/thresholding/component.xml
@@ -0,0 +1,817 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<spirit:component xmlns:xilinx="http://www.xilinx.com" xmlns:spirit="http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <spirit:vendor>amd.com</spirit:vendor>
+  <spirit:library>user</spirit:library>
+  <spirit:name>thresholding_axi_wrapper</spirit:name>
+  <spirit:version>1.0</spirit:version>
+  <spirit:busInterfaces>
+    <spirit:busInterface>
+      <spirit:name>m_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:master/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm_rtl" spirit:version="1.0"/>
+      <spirit:slave>
+        <spirit:memoryMapRef spirit:memoryMapRef="s_axilite"/>
+      </spirit:slave>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WSTRB</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WSTRB</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>ap_rst_n</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RST</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_rst_n</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>POLARITY</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_RST_N.POLARITY" spirit:choiceRef="choice_list_74b5137e">ACTIVE_LOW</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>ap_clk</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>CLK</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_clk</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_RESET</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_RESET">ap_rst_n</spirit:value>
+        </spirit:parameter>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_BUSIF</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_BUSIF">m_axis:s_axis:s_axilite</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+  </spirit:busInterfaces>
+  <spirit:memoryMaps>
+    <spirit:memoryMap>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:displayName>s_axilite</spirit:displayName>
+      <spirit:addressBlock>
+        <spirit:name>reg0</spirit:name>
+        <spirit:displayName>reg0</spirit:displayName>
+        <spirit:baseAddress spirit:format="bitString" spirit:bitStringLength="1">0x0</spirit:baseAddress>
+        <spirit:range spirit:format="long" spirit:resolve="dependent" spirit:dependency="pow(2,((spirit:decode(id(&apos;MODELPARAM_VALUE.C_BITS&apos;)) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) - 1) - 0 + 1)" spirit:minimum="4096" spirit:rangeType="long">4096</spirit:range>
+        <spirit:width spirit:format="long">32</spirit:width>
+        <spirit:usage>register</spirit:usage>
+      </spirit:addressBlock>
+    </spirit:memoryMap>
+  </spirit:memoryMaps>
+  <spirit:model>
+    <spirit:views>
+      <spirit:view>
+        <spirit:name>xilinx_anylanguagesynthesis</spirit:name>
+        <spirit:displayName>Synthesis</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:synthesis</spirit:envIdentifier>
+        <spirit:language>Verilog</spirit:language>
+        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_anylanguagesynthesis_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>5cc8f7a9</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_xpgui</spirit:name>
+        <spirit:displayName>UI Layout</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:xgui.ui</spirit:envIdentifier>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_xpgui_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>c456596c</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+    </spirit:views>
+    <spirit:ports>
+      <spirit:port>
+        <spirit:name>ap_clk</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>ap_rst_n</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((spirit:decode(id(&apos;MODELPARAM_VALUE.C_BITS&apos;)) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) - 1)">3</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WSTRB</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">3</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">0</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((((spirit:decode(id(&apos;MODELPARAM_VALUE.M&apos;)) + 7) / 8) * 8) - 1)">15</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((((spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;)) + 7) / 8) * 8) - 1)">7</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+    </spirit:ports>
+    <spirit:modelParameters>
+      <spirit:modelParameter xsi:type="spirit:nameValueTypeType">
+        <spirit:name>N</spirit:name>
+        <spirit:displayName>N</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.N">4</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter>
+        <spirit:name>M</spirit:name>
+        <spirit:displayName>M</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.M">16</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter>
+        <spirit:name>C</spirit:name>
+        <spirit:displayName>C</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter>
+        <spirit:name>C_BITS</spirit:name>
+        <spirit:displayName>C Bits</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C_BITS">0</spirit:value>
+      </spirit:modelParameter>
+    </spirit:modelParameters>
+  </spirit:model>
+  <spirit:choices>
+    <spirit:choice>
+      <spirit:name>choice_list_74b5137e</spirit:name>
+      <spirit:enumeration>ACTIVE_HIGH</spirit:enumeration>
+      <spirit:enumeration>ACTIVE_LOW</spirit:enumeration>
+    </spirit:choice>
+  </spirit:choices>
+  <spirit:fileSets>
+    <spirit:fileSet>
+      <spirit:name>xilinx_anylanguagesynthesis_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>hdl/thresholding.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_2ec027ae</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_xpgui_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>xgui/thresholding_axi_wrapper_v1_0.tcl</spirit:name>
+        <spirit:fileType>tclSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_c456596c</spirit:userFileType>
+        <spirit:userFileType>XGUI_VERSION_2</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+  </spirit:fileSets>
+  <spirit:description>thresholding_axi_wrapper_v1_0</spirit:description>
+  <spirit:parameters>
+    <spirit:parameter>
+      <spirit:name>N</spirit:name>
+      <spirit:displayName>N</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.N">4</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>M</spirit:name>
+      <spirit:displayName>M</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.M">16</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>C</spirit:name>
+      <spirit:displayName>C</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C">1</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>C_BITS</spirit:name>
+      <spirit:displayName>C_BITS</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C_BITS">0</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>Component_Name</spirit:name>
+      <spirit:value spirit:resolve="user" spirit:id="PARAM_VALUE.Component_Name" spirit:order="1">thresholding_axi_wrapper_v1_0</spirit:value>
+    </spirit:parameter>
+  </spirit:parameters>
+  <spirit:vendorExtensions>
+    <xilinx:coreExtensions>
+      <xilinx:supportedFamilies>
+        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">akintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">azynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">spartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aspartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus58g</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artixuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
+      </xilinx:supportedFamilies>
+      <xilinx:taxonomies>
+        <xilinx:taxonomy>/UserIP</xilinx:taxonomy>
+      </xilinx:taxonomies>
+      <xilinx:displayName>thresholding_axi_wrapper_v1_0</xilinx:displayName>
+      <xilinx:definitionSource>package_project</xilinx:definitionSource>
+      <xilinx:vendorDisplayName>AMD</xilinx:vendorDisplayName>
+      <xilinx:coreRevision>2</xilinx:coreRevision>
+      <xilinx:coreCreationDateTime>2022-09-20T12:31:16Z</xilinx:coreCreationDateTime>
+    </xilinx:coreExtensions>
+    <xilinx:packagingInfo>
+      <xilinx:xilinxVersion>2022.1</xilinx:xilinxVersion>
+      <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="e262c422"/>
+      <xilinx:checksum xilinx:scope="memoryMaps" xilinx:value="d6ddd21a"/>
+      <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="3302678a"/>
+      <xilinx:checksum xilinx:scope="ports" xilinx:value="c5010d89"/>
+      <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="db16a7df"/>
+      <xilinx:checksum xilinx:scope="parameters" xilinx:value="17d48459"/>
+    </xilinx:packagingInfo>
+  </spirit:vendorExtensions>
+</spirit:component>
diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl
new file mode 100644
index 0000000000..02c373e8f2
--- /dev/null
+++ b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl
@@ -0,0 +1,74 @@
+# Definitional proc to organize widgets for parameters.
+proc init_gui { IPINST } {
+  ipgui::add_param $IPINST -name "Component_Name"
+  #Adding Page
+  set Page_0 [ipgui::add_page $IPINST -name "Page 0"]
+  set C [ipgui::add_param $IPINST -name "C" -parent ${Page_0}]
+  set_property tooltip {Channel Count} ${C}
+  set C_BITS [ipgui::add_param $IPINST -name "C_BITS" -parent ${Page_0}]
+  set_property tooltip {Must be clog2(C)} ${C_BITS}
+  set M [ipgui::add_param $IPINST -name "M" -parent ${Page_0}]
+  set_property tooltip {Input Precision} ${M}
+  set N [ipgui::add_param $IPINST -name "N" -parent ${Page_0}]
+  set_property tooltip {Output Precision} ${N}
+
+
+}
+
+proc update_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to update C when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to validate C
+	return true
+}
+
+proc update_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } {
+	# Procedure called to update C_BITS when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } {
+	# Procedure called to validate C_BITS
+	return true
+}
+
+proc update_PARAM_VALUE.M { PARAM_VALUE.M } {
+	# Procedure called to update M when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.M { PARAM_VALUE.M } {
+	# Procedure called to validate M
+	return true
+}
+
+proc update_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to update N when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to validate N
+	return true
+}
+
+
+proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N}
+}
+
+proc update_MODELPARAM_VALUE.M { MODELPARAM_VALUE.M PARAM_VALUE.M } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.M}] ${MODELPARAM_VALUE.M}
+}
+
+proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C}
+}
+
+proc update_MODELPARAM_VALUE.C_BITS { MODELPARAM_VALUE.C_BITS PARAM_VALUE.C_BITS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.C_BITS}] ${MODELPARAM_VALUE.C_BITS}
+}
+

From 09c6da9fc27c3897d3a9cb7423a3e21978f17c2c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 15:36:12 +0100
Subject: [PATCH 003/111] [thresholding] FINN-44: Add skeleton class for
 Threshold (the RTL version, no HLS support for this class required).

The following functions have been removed when compared to the original Thresholding_Batch class:
    - get_weightstream_width_padded()
        needed for cppsim
    - get_ap_int_max_w()
        needed for cppsim
    - get_template_param_values()
        needed for cppsim
    - get_hls_compatible_threshold_tensor()
        needed for cppsim/hlslib
    - get_verilog_top_module_intf_names()
        already have TOP verilog module interface names I think
    - get_op_and_param_counts()
        not used anywhere
    - ipgen_extra_directives()
        needed for cppsim/hlslib

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 159 ++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100755 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
new file mode 100755
index 0000000000..0e1916706b
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -0,0 +1,159 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+
+"""@package thresholding_binary_search
+- ONNX i/o tensor shape assumptions for Thresholding:
+- input 0 is the input tensor, shape (..., NumChannels)
+- input 1 is the threshold tensor, shape (NumChannels, n_thres)
+- output 0 is the output tensor, shape (..., NumChannels) - same as input
+- the '...' here can be any shape (representing groups of vectors)
+
+This module creates an RTL IP, HLS is not supported. See 'thresholding_batch'
+for a HLS equivalent.
+"""
+
+
+class Thresholding_Bin_Search(HLSCustomOp):
+    """Class that corresponds to finn-rtllib 'thresholding' function."""
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        return {}
+
+    def calc_tmem(self):
+        return 0
+
+    def make_shape_compatible_op(self, model):
+        return []
+
+    def infer_node_datatype(self, model):
+        return
+
+    def verify_node(self):
+        return []
+
+    def bram_estimation(self):
+        return 0
+
+    def lut_estimation(self):
+        return 0
+
+    def get_input_datatype(self):
+        return None
+
+    def get_output_datatype(self):
+        return None
+
+    def get_weight_datatype(self):
+        return None
+
+    def minimize_accumulator_width(self, model):
+        return None
+
+    def get_instream_width(self):
+        return 0
+
+    def get_outstream_width(self):
+        return 0
+
+    def get_weightstream_width(self):
+        return 0
+
+    def get_folded_input_shape(self):
+        return tuple([] + [])
+
+    def get_folded_output_shape(self):
+        return tuple([] + [])
+
+    def get_normal_input_shape(self):
+        return tuple([] + [])
+
+    def get_normal_output_shape(self):
+        return tuple([] + [])
+
+    def get_number_output_values(self):
+        return 0
+
+    def get_exp_cycles(self):
+        return 0
+
+    def get_template_param_values(self):
+        return dict()
+
+    def make_weight_file(self, weights, weight_file_mode, weight_file_name):
+        """Produce a file containing given weights (thresholds) in appropriate
+        format for this layer. This file can be used for either synthesis or
+        run-time reconfig of weights.
+
+        Arguments:
+        * weights : numpy array with weights to be put into the file
+        * weight_file_mode : one of {hls_header, decoupled_verilog_dat,
+          decoupled_runtime}
+        * weight_file_name : filename for the weight file to be generated
+        """
+        return
+
+    def generate_params(self, model, path):
+        return
+
+    def execute_node(self, context, graph):
+        return
+
+    def code_generation_ipi(self):
+        return []
+
+    def global_includes(self):
+        pass
+
+    def defines(self, var):
+        pass
+
+    def read_npy_data(self):
+        pass
+
+    def strm_decl(self):
+        pass
+
+    def docompute(self):
+        pass
+
+    def dataoutstrm(self):
+        pass
+
+    def save_as_npy(self):
+        pass
+
+    def blackboxfunction(self):
+        pass
+
+    def pragmas(self):
+        pass

From 1dde2479f65de6cd8bce0be7091189c5b2d313c1 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 15:52:58 +0100
Subject: [PATCH 004/111] [thresholding] FINN-44: Update custom_op's __init__
 to pick up new Threshold_binary_search class

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index e5eb483a00..65fbd6e20c 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -58,6 +58,9 @@
 from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
 from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
+from finn.custom_op.fpgadataflow.thresholding_binary_search import (
+    Thresholding_Bin_Search,
+)
 from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
 from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
 from finn.custom_op.fpgadataflow.vectorvectoractivation import VectorVectorActivation
@@ -79,6 +82,7 @@
 custom_op["Pool_Batch"] = Pool_Batch
 custom_op["FMPadding_Batch"] = FMPadding_Batch
 custom_op["Thresholding_Batch"] = Thresholding_Batch
+custom_op["Thresholding_Binary_search"] = Thresholding_Bin_Search
 custom_op["AddStreams_Batch"] = AddStreams_Batch
 custom_op["LabelSelect_Batch"] = LabelSelect_Batch
 custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch

From 95082d3ce1f518494910b5444da05722fa8db09c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 19:01:19 +0100
Subject: [PATCH 005/111] [thresholding] FINN-44: Add inital node attributes
 for Thresholding binary search class

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 40 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 0e1916706b..97d8e0b281 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -47,7 +47,45 @@ def __init__(self, onnx_node):
         super().__init__(onnx_node)
 
     def get_nodeattr_types(self):
-        return {}
+        my_attrs = {
+            # parallelization; channels thresholded per cycle
+            "PE": ("i", True, 0),
+            # number of channels (each may have different thresholds)
+            "NumChannels": ("i", True, 0),
+            # number of steps in thresholding function. Used only in decoupled mode
+            "numSteps": ("i", True, 1),
+            # string defining memory type
+            "ram_style": ("s", False, "distributed", {"distributed", "block"}),
+            # FINN DataTypes for inputs, outputs
+            "inputDataType": ("s", True, ""),
+            "weightDataType": ("s", True, ""),
+            "outputDataType": ("s", True, ""),
+            # input and output FIFO depths
+            "inFIFODepth": ("i", False, 0),
+            "outFIFODepth": ("i", False, 0),
+            # number of input vectors, examples:
+            # [1] is a single vector (like a FC layer with batch=1)
+            # [4] is four vectors (like a FC layer with batch=4)
+            # [1, 4, 4] is four * four vectors (like a conv layer with batch=1)
+            "numInputVectors": ("ints", False, [1]),
+            # memory mode for the thresholds
+            # const -- embedded thresholds, default
+            # decoupled -- streaming thresholds with streamer packaged inside IP
+            "mem_mode": ("s", False, "const", {"const", "decoupled"}),
+            # (mem_mode = decoupled only) whether weights (thresholds) will be
+            # writable through an AXI-lite interface during runtime
+            # 1 for enabled, 0 for disabled.
+            # see finn-rtllib/memstream/doc/README for more about the memory
+            # address map used for writable weights
+            # IMPORTANT: After using AXI lite to either read or write the weights,
+            # always "flush" the accelerator by first passing a dummy input
+            # vector through the accelerator. This will get rid of any old
+            # weight data from the weight FIFOs.
+            "runtime_writeable_weights": ("i", False, 0, {0, 1}),
+            "gen_top_module": ("s", False, ""),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
 
     def calc_tmem(self):
         return 0

From 72832be6caeefdb895a911988ba5ee77d7d2813f Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 19:02:30 +0100
Subject: [PATCH 006/111] [thresholding] FINN-44: Add calc_tmem() method

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 97d8e0b281..6195a26afb 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -88,7 +88,9 @@ def get_nodeattr_types(self):
         return my_attrs
 
     def calc_tmem(self):
-        return 0
+        num_channels = self.get_nodeattr("NumChannels")
+        pe = self.get_nodeattr("PE")
+        return num_channels // pe
 
     def make_shape_compatible_op(self, model):
         return []

From 0d4e3bea27fce23864729663411a80c6734ed402 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 19:06:07 +0100
Subject: [PATCH 007/111] [thresholding] FINN-44: Add methods for retrieving
 inut/output/weight data types

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 6195a26afb..50a3ce5b6b 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -26,6 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from qonnx.core.datatype import DataType
+
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 
 """@package thresholding_binary_search
@@ -108,13 +110,14 @@ def lut_estimation(self):
         return 0
 
     def get_input_datatype(self):
-        return None
+        return DataType[self.get_nodeattr("inputDataType")]
 
     def get_output_datatype(self):
-        return None
+        return DataType[self.get_nodeattr("outputDataType")]
 
     def get_weight_datatype(self):
-        return None
+        """The term 'weights' and 'thresholds' are used interchangably in this class."""
+        return DataType[self.get_nodeattr("weightDataType")]
 
     def minimize_accumulator_width(self, model):
         return None

From 28568c6777d64adaa9d16f9bc58c3eda96fd7dbc Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 21 Sep 2022 19:09:24 +0100
Subject: [PATCH 008/111] [thresholding] FINN-44: Add methods for retrieving
 node input/output shapes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/thresholding_binary_search.py  | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 50a3ce5b6b..ee74f28485 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -132,16 +132,25 @@ def get_weightstream_width(self):
         return 0
 
     def get_folded_input_shape(self):
-        return tuple([] + [])
+        fold = self.calc_tmem()
+        pe = self.get_nodeattr("PE")
+        vecs = list(self.get_nodeattr("numInputVectors"))
+        folded_input_shape = tuple(vecs + [fold, pe])
+        return folded_input_shape
 
     def get_folded_output_shape(self):
-        return tuple([] + [])
+        # same shape as input
+        return self.get_folded_input_shape()
 
     def get_normal_input_shape(self):
-        return tuple([] + [])
+        num_channels = self.get_nodeattr("NumChannels")
+        vecs = list(self.get_nodeattr("numInputVectors"))
+        normal_input_shape = tuple(vecs + [num_channels])
+        return normal_input_shape
 
     def get_normal_output_shape(self):
-        return tuple([] + [])
+        # same shape as input
+        return self.get_normal_input_shape()
 
     def get_number_output_values(self):
         return 0

From 280870d25864781b2ce3683a10824049d19f9bff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 24 Oct 2022 14:58:32 +0100
Subject: [PATCH 009/111] Thresholding over signed inputs.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 93ccdc51c5..9deeac458c 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -64,8 +64,8 @@ module thresholding #(
 
 	// Input Stream
 	input	logic  ivld,
-	input	logic [C_BITS-1:0]  icnl,	// Ignored for C == 1
-	input	logic [M     -1:0]  idat,
+	input	logic        [C_BITS-1:0]  icnl,	// Ignored for C == 1
+	input	logic signed [M     -1:0]  idat,
 
 	// Output Stream
 	output	logic  ovld,
@@ -75,10 +75,10 @@ module thresholding #(
 
 	// Pipeline Links & Feed
 	typedef struct packed {
-		logic               vld;	// Valid data identification
-		logic [C_BITS-1:0]  cnl;	// Channel
-		logic [M     -1:0]  val;	// Original input value
-		logic [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
+		logic                      vld;	// Valid data identification
+		logic        [C_BITS-1:0]  cnl;	// Channel
+		logic signed [M     -1:0]  val;	// Original input value
+		logic        [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
 	} pipe_t;
 	uwire pipe_t  pipe[0:N];
 	assign	pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} };	// Feed original input
@@ -88,13 +88,13 @@ module thresholding #(
 	for(genvar  stage = 0; stage < N; stage++) begin : genStages
 
 		// Threshold Memory
-		uwire [M-1:0]  thresh;
+		uwire signed [M-1:0]  thresh;
 		if(1) begin : blkUpdate
 
 			// Write control: local select from global address
 			uwire  we = twe && tws[stage];
 			if((C == 1) && (stage == 0)) begin
-				logic [M-1:0]  Thresh = 'x;
+				logic signed [M-1:0]  Thresh = 'x;
 				always_ff @(posedge clk) begin
 					if(rst)      Thresh <= 'x;
 					else if(we)  Thresh <= twd;
@@ -102,7 +102,7 @@ module thresholding #(
 				assign  thresh = Thresh;
 			end
 			else begin
-				logic [M-1:0]  Threshs[C * 2**stage];
+				logic signed [M-1:0]  Threshs[C * 2**stage];
 				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
 				uwire [$clog2(C)+stage-1:0]  ra;
 				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
@@ -114,7 +114,7 @@ module thresholding #(
 				end
 
 				// Read
-				logic [M-1:0]  RdReg;
+				logic signed [M-1:0]  RdReg;
 				always_ff @(posedge clk) begin
 					if(en)  RdReg <= Threshs[ra];
 				end

From 2bf1a21e463297a885b1a7a40ab78fb2deeb2d52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 24 Oct 2022 15:38:22 +0100
Subject: [PATCH 010/111] Introduce an optional threshold output bias.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 9deeac458c..cea93e40ab 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -48,7 +48,12 @@ module thresholding #(
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
 
-	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C)
+	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+
+	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+	localparam int unsigned  O_BITS = BIAS <= 0?
+		/* unsigned */ $clog2(2**N-BIAS) :
+		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
 	// Global Control
 	input	logic  clk,
@@ -70,7 +75,7 @@ module thresholding #(
 	// Output Stream
 	output	logic  ovld,
 	output	logic [C_BITS-1:0]  ocnl,
-	output	logic [N     -1:0]  odat
+	output	logic [O_BITS-1:0]  odat
 );
 
 	// Pipeline Links & Feed
@@ -148,6 +153,6 @@ module thresholding #(
 	// Output
 	assign	ovld = pipe[N].vld;
 	assign	ocnl = pipe[N].cnl;
-	assign	odat = pipe[N].res;
+	assign	odat = pipe[N].res - BIAS;
 
 endmodule : thresholding

From 4c7b5acd24cf88716fdfdc1dac8d8cc2c2ece44e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 25 Oct 2022 06:17:14 +0100
Subject: [PATCH 011/111] Exposing the thresholding bias through the AXI
 adapter.

---
 finn-rtllib/thresholding/hdl/thresholding_axi.sv | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 71e54c5ca0..a20952c33b 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -36,6 +36,12 @@ module thresholding_axi #(
 	int unsigned  N,	// output precision
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C		// Channels
+
+	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+
+	localparam int unsigned  O_BITS = BIAS <= 0?
+		/* unsigned */ $clog2(2**N-BIAS) :
+		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
 	//- Global Control ------------------
 	input	logic  ap_clk,
@@ -74,7 +80,7 @@ module thresholding_axi #(
 	//- AXI Stream - Output -------------
 	input	logic  m_axis_tready,
 	output	logic  m_axis_tvalid,
-	output	logic [((N+7)/8)*8-1:0]  m_axis_tdata
+	output	logic [((O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
 	//- Global Control ------------------------------------------------------
 	uwire  clk = ap_clk;
@@ -134,12 +140,12 @@ module thresholding_axi #(
 
 	//- IO-Sandwich with two-stage output buffer for containing a local enable
 	uwire  en;
-	uwire [N-1:0]  odat;
+	uwire [O_BITS-1:0]  odat;
 	uwire  ovld;
 	if(1) begin : blkOutputDecouple
 		typedef struct {
 			logic          vld;
-			logic [N-1:0]  dat;
+			logic [O_BITS-1:0]  dat;
 		} buf_t;
 		buf_t  Buf[2] = '{ default: '{ vld: 0, dat: 'x } };
 		always_ff @(posedge clk) begin
@@ -187,7 +193,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C)) core (
+	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,

From 7663d3f60c445ad595a193eb6b493b4f65b2f921 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 25 Oct 2022 11:55:19 +0100
Subject: [PATCH 012/111] Have thresholding wrapper pass on bias parameter and
 compute derived ones.

---
 .../thresholding/hdl/thresholding_axi_wrapper.v       | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index bb6b17b32f..b5c65e5879 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -36,7 +36,12 @@ module thresholding_axi_wrapper #(
 	parameter  N,	// output precision
 	parameter  M,	// input/threshold precision
 	parameter  C,	// Channels
-	parameter  C_BITS //= $clog2(C)
+	parameter  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+
+	localparam  C_BITS = $clog2(C),
+	localparam  O_BITS = BIAS <= 0?
+		/* unsigned */ $clog2(2**N-BIAS) :
+		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
 	//- Global Control ------------------
 	input	ap_clk,
@@ -75,10 +80,10 @@ module thresholding_axi_wrapper #(
 	//- AXI Stream - Output -------------
 	input	m_axis_tready,
 	output	m_axis_tvalid,
-	output	[((N+7)/8)*8-1:0]  m_axis_tdata
+	output	[((O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),

From 55e2eacd4b554456bb980f7518f9c79d7be3104d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 25 Oct 2022 15:53:11 +0100
Subject: [PATCH 013/111] Fix typo.

---
 finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index a20952c33b..6b869ba303 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -35,7 +35,7 @@
 module thresholding_axi #(
 	int unsigned  N,	// output precision
 	int unsigned  M,	// input/threshold precision
-	int unsigned  C		// Channels
+	int unsigned  C,	// Channels
 
 	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 

From fa5d71aaf2b4ba3340aa8e07e23d90bf45bee32d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 25 Oct 2022 16:58:08 +0100
Subject: [PATCH 014/111] Abandon IPI support files.

---
 finn-rtllib/thresholding/component.xml        | 817 ------------------
 .../xgui/thresholding_axi_wrapper_v1_0.tcl    |  74 --
 2 files changed, 891 deletions(-)
 delete mode 100644 finn-rtllib/thresholding/component.xml
 delete mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl

diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml
deleted file mode 100644
index 0a56f93316..0000000000
--- a/finn-rtllib/thresholding/component.xml
+++ /dev/null
@@ -1,817 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<spirit:component xmlns:xilinx="http://www.xilinx.com" xmlns:spirit="http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <spirit:vendor>amd.com</spirit:vendor>
-  <spirit:library>user</spirit:library>
-  <spirit:name>thresholding_axi_wrapper</spirit:name>
-  <spirit:version>1.0</spirit:version>
-  <spirit:busInterfaces>
-    <spirit:busInterface>
-      <spirit:name>m_axis</spirit:name>
-      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
-      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
-      <spirit:master/>
-      <spirit:portMaps>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TDATA</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>m_axis_tdata</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>m_axis_tvalid</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>m_axis_tready</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-      </spirit:portMaps>
-    </spirit:busInterface>
-    <spirit:busInterface>
-      <spirit:name>s_axis</spirit:name>
-      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
-      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
-      <spirit:slave/>
-      <spirit:portMaps>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TDATA</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axis_tdata</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axis_tvalid</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>TREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axis_tready</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-      </spirit:portMaps>
-    </spirit:busInterface>
-    <spirit:busInterface>
-      <spirit:name>s_axilite</spirit:name>
-      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm" spirit:version="1.0"/>
-      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm_rtl" spirit:version="1.0"/>
-      <spirit:slave>
-        <spirit:memoryMapRef spirit:memoryMapRef="s_axilite"/>
-      </spirit:slave>
-      <spirit:portMaps>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>AWADDR</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_AWADDR</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>AWVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_AWVALID</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>AWREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_AWREADY</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>WDATA</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_WDATA</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>WSTRB</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_WSTRB</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>WVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_WVALID</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>WREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_WREADY</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>BRESP</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_BRESP</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>BVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_BVALID</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>BREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_BREADY</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>ARADDR</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_ARADDR</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>ARVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_ARVALID</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>ARREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_ARREADY</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>RDATA</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_RDATA</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>RRESP</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_RRESP</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>RVALID</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_RVALID</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>RREADY</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>s_axilite_RREADY</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-      </spirit:portMaps>
-    </spirit:busInterface>
-    <spirit:busInterface>
-      <spirit:name>ap_rst_n</spirit:name>
-      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset" spirit:version="1.0"/>
-      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset_rtl" spirit:version="1.0"/>
-      <spirit:slave/>
-      <spirit:portMaps>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>RST</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>ap_rst_n</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-      </spirit:portMaps>
-      <spirit:parameters>
-        <spirit:parameter>
-          <spirit:name>POLARITY</spirit:name>
-          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_RST_N.POLARITY" spirit:choiceRef="choice_list_74b5137e">ACTIVE_LOW</spirit:value>
-        </spirit:parameter>
-      </spirit:parameters>
-    </spirit:busInterface>
-    <spirit:busInterface>
-      <spirit:name>ap_clk</spirit:name>
-      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock" spirit:version="1.0"/>
-      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock_rtl" spirit:version="1.0"/>
-      <spirit:slave/>
-      <spirit:portMaps>
-        <spirit:portMap>
-          <spirit:logicalPort>
-            <spirit:name>CLK</spirit:name>
-          </spirit:logicalPort>
-          <spirit:physicalPort>
-            <spirit:name>ap_clk</spirit:name>
-          </spirit:physicalPort>
-        </spirit:portMap>
-      </spirit:portMaps>
-      <spirit:parameters>
-        <spirit:parameter>
-          <spirit:name>ASSOCIATED_RESET</spirit:name>
-          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_RESET">ap_rst_n</spirit:value>
-        </spirit:parameter>
-        <spirit:parameter>
-          <spirit:name>ASSOCIATED_BUSIF</spirit:name>
-          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_BUSIF">m_axis:s_axis:s_axilite</spirit:value>
-        </spirit:parameter>
-      </spirit:parameters>
-    </spirit:busInterface>
-  </spirit:busInterfaces>
-  <spirit:memoryMaps>
-    <spirit:memoryMap>
-      <spirit:name>s_axilite</spirit:name>
-      <spirit:displayName>s_axilite</spirit:displayName>
-      <spirit:addressBlock>
-        <spirit:name>reg0</spirit:name>
-        <spirit:displayName>reg0</spirit:displayName>
-        <spirit:baseAddress spirit:format="bitString" spirit:bitStringLength="1">0x0</spirit:baseAddress>
-        <spirit:range spirit:format="long" spirit:resolve="dependent" spirit:dependency="pow(2,((spirit:decode(id(&apos;MODELPARAM_VALUE.C_BITS&apos;)) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) - 1) - 0 + 1)" spirit:minimum="4096" spirit:rangeType="long">4096</spirit:range>
-        <spirit:width spirit:format="long">32</spirit:width>
-        <spirit:usage>register</spirit:usage>
-      </spirit:addressBlock>
-    </spirit:memoryMap>
-  </spirit:memoryMaps>
-  <spirit:model>
-    <spirit:views>
-      <spirit:view>
-        <spirit:name>xilinx_anylanguagesynthesis</spirit:name>
-        <spirit:displayName>Synthesis</spirit:displayName>
-        <spirit:envIdentifier>:vivado.xilinx.com:synthesis</spirit:envIdentifier>
-        <spirit:language>Verilog</spirit:language>
-        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
-        <spirit:fileSetRef>
-          <spirit:localName>xilinx_anylanguagesynthesis_view_fileset</spirit:localName>
-        </spirit:fileSetRef>
-        <spirit:parameters>
-          <spirit:parameter>
-            <spirit:name>viewChecksum</spirit:name>
-            <spirit:value>5cc8f7a9</spirit:value>
-          </spirit:parameter>
-        </spirit:parameters>
-      </spirit:view>
-      <spirit:view>
-        <spirit:name>xilinx_xpgui</spirit:name>
-        <spirit:displayName>UI Layout</spirit:displayName>
-        <spirit:envIdentifier>:vivado.xilinx.com:xgui.ui</spirit:envIdentifier>
-        <spirit:fileSetRef>
-          <spirit:localName>xilinx_xpgui_view_fileset</spirit:localName>
-        </spirit:fileSetRef>
-        <spirit:parameters>
-          <spirit:parameter>
-            <spirit:name>viewChecksum</spirit:name>
-            <spirit:value>c456596c</spirit:value>
-          </spirit:parameter>
-        </spirit:parameters>
-      </spirit:view>
-    </spirit:views>
-    <spirit:ports>
-      <spirit:port>
-        <spirit:name>ap_clk</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>ap_rst_n</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_AWVALID</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_AWREADY</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_AWADDR</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((spirit:decode(id(&apos;MODELPARAM_VALUE.C_BITS&apos;)) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) - 1)">3</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_WVALID</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_WREADY</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_WDATA</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">31</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_WSTRB</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">3</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_BVALID</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_BREADY</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_BRESP</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">1</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_ARVALID</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_ARREADY</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_ARADDR</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">0</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_RVALID</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_RREADY</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_RDATA</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">31</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axilite_RRESP</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long">1</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axis_tready</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axis_tvalid</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>s_axis_tdata</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((((spirit:decode(id(&apos;MODELPARAM_VALUE.M&apos;)) + 7) / 8) * 8) - 1)">15</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>m_axis_tready</spirit:name>
-        <spirit:wire>
-          <spirit:direction>in</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-          <spirit:driver>
-            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
-          </spirit:driver>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>m_axis_tvalid</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-      <spirit:port>
-        <spirit:name>m_axis_tdata</spirit:name>
-        <spirit:wire>
-          <spirit:direction>out</spirit:direction>
-          <spirit:vector>
-            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="((((spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;)) + 7) / 8) * 8) - 1)">7</spirit:left>
-            <spirit:right spirit:format="long">0</spirit:right>
-          </spirit:vector>
-          <spirit:wireTypeDefs>
-            <spirit:wireTypeDef>
-              <spirit:typeName>std_logic_vector</spirit:typeName>
-              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
-            </spirit:wireTypeDef>
-          </spirit:wireTypeDefs>
-        </spirit:wire>
-      </spirit:port>
-    </spirit:ports>
-    <spirit:modelParameters>
-      <spirit:modelParameter xsi:type="spirit:nameValueTypeType">
-        <spirit:name>N</spirit:name>
-        <spirit:displayName>N</spirit:displayName>
-        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.N">4</spirit:value>
-      </spirit:modelParameter>
-      <spirit:modelParameter>
-        <spirit:name>M</spirit:name>
-        <spirit:displayName>M</spirit:displayName>
-        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.M">16</spirit:value>
-      </spirit:modelParameter>
-      <spirit:modelParameter>
-        <spirit:name>C</spirit:name>
-        <spirit:displayName>C</spirit:displayName>
-        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C">1</spirit:value>
-      </spirit:modelParameter>
-      <spirit:modelParameter>
-        <spirit:name>C_BITS</spirit:name>
-        <spirit:displayName>C Bits</spirit:displayName>
-        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C_BITS">0</spirit:value>
-      </spirit:modelParameter>
-    </spirit:modelParameters>
-  </spirit:model>
-  <spirit:choices>
-    <spirit:choice>
-      <spirit:name>choice_list_74b5137e</spirit:name>
-      <spirit:enumeration>ACTIVE_HIGH</spirit:enumeration>
-      <spirit:enumeration>ACTIVE_LOW</spirit:enumeration>
-    </spirit:choice>
-  </spirit:choices>
-  <spirit:fileSets>
-    <spirit:fileSet>
-      <spirit:name>xilinx_anylanguagesynthesis_view_fileset</spirit:name>
-      <spirit:file>
-        <spirit:name>hdl/thresholding.sv</spirit:name>
-        <spirit:fileType>systemVerilogSource</spirit:fileType>
-      </spirit:file>
-      <spirit:file>
-        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
-        <spirit:fileType>systemVerilogSource</spirit:fileType>
-      </spirit:file>
-      <spirit:file>
-        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
-        <spirit:fileType>verilogSource</spirit:fileType>
-        <spirit:userFileType>CHECKSUM_2ec027ae</spirit:userFileType>
-      </spirit:file>
-    </spirit:fileSet>
-    <spirit:fileSet>
-      <spirit:name>xilinx_xpgui_view_fileset</spirit:name>
-      <spirit:file>
-        <spirit:name>xgui/thresholding_axi_wrapper_v1_0.tcl</spirit:name>
-        <spirit:fileType>tclSource</spirit:fileType>
-        <spirit:userFileType>CHECKSUM_c456596c</spirit:userFileType>
-        <spirit:userFileType>XGUI_VERSION_2</spirit:userFileType>
-      </spirit:file>
-    </spirit:fileSet>
-  </spirit:fileSets>
-  <spirit:description>thresholding_axi_wrapper_v1_0</spirit:description>
-  <spirit:parameters>
-    <spirit:parameter>
-      <spirit:name>N</spirit:name>
-      <spirit:displayName>N</spirit:displayName>
-      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.N">4</spirit:value>
-    </spirit:parameter>
-    <spirit:parameter>
-      <spirit:name>M</spirit:name>
-      <spirit:displayName>M</spirit:displayName>
-      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.M">16</spirit:value>
-    </spirit:parameter>
-    <spirit:parameter>
-      <spirit:name>C</spirit:name>
-      <spirit:displayName>C</spirit:displayName>
-      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C">1</spirit:value>
-    </spirit:parameter>
-    <spirit:parameter>
-      <spirit:name>C_BITS</spirit:name>
-      <spirit:displayName>C_BITS</spirit:displayName>
-      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C_BITS">0</spirit:value>
-    </spirit:parameter>
-    <spirit:parameter>
-      <spirit:name>Component_Name</spirit:name>
-      <spirit:value spirit:resolve="user" spirit:id="PARAM_VALUE.Component_Name" spirit:order="1">thresholding_axi_wrapper_v1_0</spirit:value>
-    </spirit:parameter>
-  </spirit:parameters>
-  <spirit:vendorExtensions>
-    <xilinx:coreExtensions>
-      <xilinx:supportedFamilies>
-        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">akintex7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">artix7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">artix7l</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">aartix7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">qartix7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">azynq</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">spartan7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">aspartan7</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexuplus58g</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">artixuplus</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
-      </xilinx:supportedFamilies>
-      <xilinx:taxonomies>
-        <xilinx:taxonomy>/UserIP</xilinx:taxonomy>
-      </xilinx:taxonomies>
-      <xilinx:displayName>thresholding_axi_wrapper_v1_0</xilinx:displayName>
-      <xilinx:definitionSource>package_project</xilinx:definitionSource>
-      <xilinx:vendorDisplayName>AMD</xilinx:vendorDisplayName>
-      <xilinx:coreRevision>2</xilinx:coreRevision>
-      <xilinx:coreCreationDateTime>2022-09-20T12:31:16Z</xilinx:coreCreationDateTime>
-    </xilinx:coreExtensions>
-    <xilinx:packagingInfo>
-      <xilinx:xilinxVersion>2022.1</xilinx:xilinxVersion>
-      <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="e262c422"/>
-      <xilinx:checksum xilinx:scope="memoryMaps" xilinx:value="d6ddd21a"/>
-      <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="3302678a"/>
-      <xilinx:checksum xilinx:scope="ports" xilinx:value="c5010d89"/>
-      <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="db16a7df"/>
-      <xilinx:checksum xilinx:scope="parameters" xilinx:value="17d48459"/>
-    </xilinx:packagingInfo>
-  </spirit:vendorExtensions>
-</spirit:component>
diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl
deleted file mode 100644
index 02c373e8f2..0000000000
--- a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl
+++ /dev/null
@@ -1,74 +0,0 @@
-# Definitional proc to organize widgets for parameters.
-proc init_gui { IPINST } {
-  ipgui::add_param $IPINST -name "Component_Name"
-  #Adding Page
-  set Page_0 [ipgui::add_page $IPINST -name "Page 0"]
-  set C [ipgui::add_param $IPINST -name "C" -parent ${Page_0}]
-  set_property tooltip {Channel Count} ${C}
-  set C_BITS [ipgui::add_param $IPINST -name "C_BITS" -parent ${Page_0}]
-  set_property tooltip {Must be clog2(C)} ${C_BITS}
-  set M [ipgui::add_param $IPINST -name "M" -parent ${Page_0}]
-  set_property tooltip {Input Precision} ${M}
-  set N [ipgui::add_param $IPINST -name "N" -parent ${Page_0}]
-  set_property tooltip {Output Precision} ${N}
-
-
-}
-
-proc update_PARAM_VALUE.C { PARAM_VALUE.C } {
-	# Procedure called to update C when any of the dependent parameters in the arguments change
-}
-
-proc validate_PARAM_VALUE.C { PARAM_VALUE.C } {
-	# Procedure called to validate C
-	return true
-}
-
-proc update_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } {
-	# Procedure called to update C_BITS when any of the dependent parameters in the arguments change
-}
-
-proc validate_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } {
-	# Procedure called to validate C_BITS
-	return true
-}
-
-proc update_PARAM_VALUE.M { PARAM_VALUE.M } {
-	# Procedure called to update M when any of the dependent parameters in the arguments change
-}
-
-proc validate_PARAM_VALUE.M { PARAM_VALUE.M } {
-	# Procedure called to validate M
-	return true
-}
-
-proc update_PARAM_VALUE.N { PARAM_VALUE.N } {
-	# Procedure called to update N when any of the dependent parameters in the arguments change
-}
-
-proc validate_PARAM_VALUE.N { PARAM_VALUE.N } {
-	# Procedure called to validate N
-	return true
-}
-
-
-proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } {
-	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
-	set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N}
-}
-
-proc update_MODELPARAM_VALUE.M { MODELPARAM_VALUE.M PARAM_VALUE.M } {
-	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
-	set_property value [get_property value ${PARAM_VALUE.M}] ${MODELPARAM_VALUE.M}
-}
-
-proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } {
-	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
-	set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C}
-}
-
-proc update_MODELPARAM_VALUE.C_BITS { MODELPARAM_VALUE.C_BITS PARAM_VALUE.C_BITS } {
-	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
-	set_property value [get_property value ${PARAM_VALUE.C_BITS}] ${MODELPARAM_VALUE.C_BITS}
-}
-

From 174c0ffe1d0614dd14013de1b073469d79c9191e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 15 Nov 2022 17:59:23 +0000
Subject: [PATCH 015/111] [thresholding] allow for positive and negative bias
 values

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv            | 4 ++--
 finn-rtllib/thresholding/hdl/thresholding_axi.sv        | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index cea93e40ab..a99c752e17 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -51,7 +51,7 @@ module thresholding #(
 	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
-	localparam int unsigned  O_BITS = BIAS <= 0?
+	localparam int unsigned  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
@@ -153,6 +153,6 @@ module thresholding #(
 	// Output
 	assign	ovld = pipe[N].vld;
 	assign	ocnl = pipe[N].cnl;
-	assign	odat = pipe[N].res - BIAS;
+	assign	odat = pipe[N].res + BIAS;
 
 endmodule : thresholding
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 6b869ba303..795683da1d 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -39,7 +39,7 @@ module thresholding_axi #(
 
 	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
-	localparam int unsigned  O_BITS = BIAS <= 0?
+	localparam int unsigned  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index b5c65e5879..6bfc2f57a4 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -39,7 +39,7 @@ module thresholding_axi_wrapper #(
 	parameter  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam  C_BITS = $clog2(C),
-	localparam  O_BITS = BIAS <= 0?
+	localparam  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(

From 2ec20e5cab8c821d7dc6d652564e85eb1bc84b6b Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 15 Nov 2022 18:00:52 +0000
Subject: [PATCH 016/111] [thresholding] pass bias from top module to
 thresholding.sv core

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv            | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi.sv        | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index a99c752e17..f9763af96c 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -48,7 +48,7 @@ module thresholding #(
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
 
-	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
 	localparam int unsigned  O_BITS = BIAS > 0?
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 795683da1d..e4f3feac3f 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -37,7 +37,7 @@ module thresholding_axi #(
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
 
-	int  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam int unsigned  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 6bfc2f57a4..1b5921d8ba 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -36,7 +36,7 @@ module thresholding_axi_wrapper #(
 	parameter  N,	// output precision
 	parameter  M,	// input/threshold precision
 	parameter  C,	// Channels
-	parameter  BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	int BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam  C_BITS = $clog2(C),
 	localparam  O_BITS = BIAS > 0?

From 861614837dd187dc58ab24af0b5d0cd2050c76e6 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 15 Nov 2022 18:07:56 +0000
Subject: [PATCH 017/111] [thresholding] pass O_BITS from top module to
 thresholding.sv core

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv            | 4 +---
 finn-rtllib/thresholding/hdl/thresholding_axi.sv        | 6 ++----
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 4 ++--
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index f9763af96c..04116e995c 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -51,9 +51,7 @@ module thresholding #(
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
-	localparam int unsigned  O_BITS = BIAS > 0?
-		/* unsigned */ $clog2(2**N-BIAS) :
-		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
+	int unsigned O_BITS
 )(
 	// Global Control
 	input	logic  clk,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index e4f3feac3f..a7eec445e0 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -39,9 +39,7 @@ module thresholding_axi #(
 
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
-	localparam int unsigned  O_BITS = BIAS > 0?
-		/* unsigned */ $clog2(2**N-BIAS) :
-		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
+	int unsigned O_BITS
 )(
 	//- Global Control ------------------
 	input	logic  ap_clk,
@@ -193,7 +191,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS)) core (
+	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 1b5921d8ba..5c43a70445 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -39,7 +39,7 @@ module thresholding_axi_wrapper #(
 	int BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	localparam  C_BITS = $clog2(C),
-	localparam  O_BITS = BIAS > 0?
+	parameter  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
@@ -83,7 +83,7 @@ module thresholding_axi_wrapper #(
 	output	[((O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),

From 275abaddee9504360c1589565036611bab5955da Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 15 Nov 2022 18:10:12 +0000
Subject: [PATCH 018/111] [thresholding] pass C_BITS from top module to
 thresholding.sv core

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv            | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi.sv        | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 04116e995c..70f94f1c22 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -50,7 +50,7 @@ module thresholding #(
 
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
-	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+	int unsigned  C_BITS,
 	int unsigned O_BITS
 )(
 	// Global Control
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index a7eec445e0..fac69b33fc 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -191,7 +191,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) core (
+	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 5c43a70445..588f9e4852 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -38,7 +38,7 @@ module thresholding_axi_wrapper #(
 	parameter  C,	// Channels
 	int BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
-	localparam  C_BITS = $clog2(C),
+	parameter  C_BITS = C < 2 ? 1 : $clog2(C),
 	parameter  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)

From 8849c026b780c152dd51c0e007c5f72bdca4808c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 09:31:20 +0000
Subject: [PATCH 019/111] [thresholding] create & fill in RTL template values
 using FINN

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 16 +--
 .../thresholding/hdl/thresholding_axi.sv      |  6 +-
 .../hdl/thresholding_axi_wrapper.v            | 14 +--
 .../thresholding_binary_search.py             | 99 +++++++++++++++++++
 4 files changed, 117 insertions(+), 18 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 70f94f1c22..25d6ff3112 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -43,7 +43,7 @@
  *  threshold configuration relies on a channel address prefix. Inputs are
  *  accompanied by a channel selector.
  *****************************************************************************/
-module thresholding #(
+module $MODULE_NAME$ #(
 	int unsigned  N,  // output precision
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
@@ -68,7 +68,7 @@ module thresholding #(
 	// Input Stream
 	input	logic  ivld,
 	input	logic        [C_BITS-1:0]  icnl,	// Ignored for C == 1
-	input	logic signed [M     -1:0]  idat,
+	input	logic $SIGN$ [M     -1:0]  idat,
 
 	// Output Stream
 	output	logic  ovld,
@@ -80,7 +80,7 @@ module thresholding #(
 	typedef struct packed {
 		logic                      vld;	// Valid data identification
 		logic        [C_BITS-1:0]  cnl;	// Channel
-		logic signed [M     -1:0]  val;	// Original input value
+		logic $SIGN$ [M     -1:0]  val;	// Original input value
 		logic        [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
 	} pipe_t;
 	uwire pipe_t  pipe[0:N];
@@ -91,13 +91,13 @@ module thresholding #(
 	for(genvar  stage = 0; stage < N; stage++) begin : genStages
 
 		// Threshold Memory
-		uwire signed [M-1:0]  thresh;
+		uwire $SIGN$ [M-1:0]  thresh;
 		if(1) begin : blkUpdate
 
 			// Write control: local select from global address
 			uwire  we = twe && tws[stage];
 			if((C == 1) && (stage == 0)) begin
-				logic signed [M-1:0]  Thresh = 'x;
+				logic $SIGN$ [M-1:0]  Thresh = 'x;
 				always_ff @(posedge clk) begin
 					if(rst)      Thresh <= 'x;
 					else if(we)  Thresh <= twd;
@@ -105,7 +105,7 @@ module thresholding #(
 				assign  thresh = Thresh;
 			end
 			else begin
-				logic signed [M-1:0]  Threshs[C * 2**stage];
+				logic $SIGN$ [M-1:0]  Threshs[C * 2**stage];
 				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
 				uwire [$clog2(C)+stage-1:0]  ra;
 				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
@@ -117,7 +117,7 @@ module thresholding #(
 				end
 
 				// Read
-				logic signed [M-1:0]  RdReg;
+				logic $SIGN$ [M-1:0]  RdReg;
 				always_ff @(posedge clk) begin
 					if(en)  RdReg <= Threshs[ra];
 				end
@@ -153,4 +153,4 @@ module thresholding #(
 	assign	ocnl = pipe[N].cnl;
 	assign	odat = pipe[N].res + BIAS;
 
-endmodule : thresholding
+endmodule : $MODULE_NAME$
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index fac69b33fc..97cdfd3e12 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -32,7 +32,7 @@
  * @author	Thomas B. Preußer <tpreusse@amd.com>
  *****************************************************************************/
 
-module thresholding_axi #(
+module $MODULE_NAME_AXI$ #(
 	int unsigned  N,	// output precision
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
@@ -191,7 +191,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
+	$MODULE_NAME$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
@@ -199,4 +199,4 @@ module thresholding_axi #(
 		.ovld, .ocnl(), .odat
 	);
 
-endmodule : thresholding_axi
+endmodule : $MODULE_NAME_AXI$
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 588f9e4852..e3f8596bc8 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -32,11 +32,11 @@
  * @author	Thomas B. Preußer <tpreusse@amd.com>
  *****************************************************************************/
 
-module thresholding_axi_wrapper #(
-	parameter  N,	// output precision
-	parameter  M,	// input/threshold precision
-	parameter  C,	// Channels
-	int BIAS = 0,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+module $MODULE_NAME_AXI_WRAPPER$ #(
+	parameter  N = $N$,	// output precision
+	parameter  M = $M$,	// input/threshold precision
+	parameter  C = $C$,	// Channels
+	int BIAS = $BIAS$,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	parameter  C_BITS = C < 2 ? 1 : $clog2(C),
 	parameter  O_BITS = BIAS > 0?
@@ -83,7 +83,7 @@ module thresholding_axi_wrapper #(
 	output	[((O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
+	$MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),
@@ -124,4 +124,4 @@ module thresholding_axi_wrapper #(
 		.m_axis_tdata(m_axis_tdata)
 	);
 
-endmodule : thresholding_axi_wrapper
+endmodule : $MODULE_NAME_AXI_WRAPPER$
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index ee74f28485..d546d52843 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
 from qonnx.core.datatype import DataType
 
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -85,6 +86,7 @@ def get_nodeattr_types(self):
             # weight data from the weight FIFOs.
             "runtime_writeable_weights": ("i", False, 0, {0, 1}),
             "gen_top_module": ("s", False, ""),
+            "activation_bias": ("i", False, 0),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -174,6 +176,103 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         """
         return
 
+    # Get the integer from the DataType and string-ify it
+    # This assumes that the data is in the form "INTx" or similar
+    def conv_datatype_to_str(self, data_type):
+        # Handle the case that an int is passed to the function
+        if isinstance(data_type, int):
+            return str(data_type)
+        return str(DataType[data_type].bitwidth())
+
+    def prepare_codegen_rtl_values(self):
+        """All dictionary values produced in this function are to replace
+        their key value(s) in the RTL template files"""
+        code_gen_dict = {}
+
+        # Identify the module names
+        code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()]
+        code_gen_dict["$MODULE_NAME_AXI$"] = [self.get_verilog_top_module_name() + "_axi"]
+        code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [self.get_verilog_top_module_name() + "_axi_wrapper"]
+        # Set the top module name - AXI wrapper
+        code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"]
+
+        # Identify the module variables
+        output_data_type = self.get_nodeattr("outputDataType") # output precision
+        input_data_type = self.get_nodeattr("inputDataType") # input/threshold precision
+        num_channels = self.get_nodeattr("NumChannels") # number of channels
+        bias = self.get_nodeattr("activation_bias") # activation bias value
+
+        code_gen_dict["$N$"] = [self.conv_datatype_to_str(output_data_type)] # output precision
+        code_gen_dict["$M$"] = [self.conv_datatype_to_str(input_data_type)] # input/threshold precision
+        code_gen_dict["$C$"] = [self.conv_datatype_to_str(num_channels)] # number of channels
+        code_gen_dict["$BIAS$"] = [self.conv_datatype_to_str(bias)] # activation bias value
+
+        # Is the input datatype signed or unsigned? The thresholding core needs to know this
+        if self.get_input_datatype().min() < 0:
+            code_gen_dict["$SIGN$"] = ["signed"]
+        else:
+            code_gen_dict["$SIGN$"] = ["unsigned"]
+
+        return code_gen_dict
+
+    def get_rtl_file_list(self):
+        return ["thresholding.sv",
+                "thresholding_axi.sv",
+                "thresholding_axi_wrapper.v"]
+
+    def get_rtl_file_paths(self):
+        rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/"
+        rtl_file_list = self.get_rtl_file_list()
+        rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list]
+        return rtl_file_paths
+
+    def get_rtl_template_data(self, path):
+        with open(path, "r") as f:
+            template = f.read()
+        return template
+
+    def fill_in_rtl_template_data(self, replace_dict, template_data):
+        template_data_cp = template_data
+        for key in replace_dict:
+            replacement_line = "\n".join(replace_dict[key])
+            template_data_cp = template_data_cp.replace(key, replacement_line)
+        return template_data_cp
+
+    def dump_rtl_data(self, dest_dir, filename, data):
+        with open(os.path.join(dest_dir, filename), "w") as f:
+            f.write(data)
+        return
+
+    def generate_hdl(self):
+        # Generate a dictionary of values to put in RTL template
+        code_gen_dict = self.prepare_codegen_rtl_values()
+
+        # Retrieve the destination directory for the final RTL files
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        for rtl_file_path in self.get_rtl_file_paths():
+            # read in original RTL template file
+            template_data = self.get_rtl_template_data(rtl_file_path)
+            # apply code generation to templates
+            data = self.fill_in_rtl_template_data(code_gen_dict, template_data)
+            # dump filled-in template to destination directory for compilation
+            file_only_path = rtl_file_path.split('/')[-1]
+            self.dump_rtl_data(code_gen_dir, file_only_path, data)
+
+        # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation
+        self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0])
+        return
+
+    def code_generation_ipgen(self, model, fpgapart, clk):
+        self.generate_hdl()
+
+        # set ipgen_path and ip_path so that HLS-Synth transformation
+        # and stich_ip transformation do not complain
+        # i.e. during the HLSSynthIP() transformation
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        self.set_nodeattr("ipgen_path", code_gen_dir)
+        self.set_nodeattr("ip_path", code_gen_dir)
+
     def generate_params(self, model, path):
         return
 

From 84704edd5aa7e53351819238f96d4c63dfb45d07 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 09:45:29 +0000
Subject: [PATCH 020/111] [thresholding] add method get_weightstream_width()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index d546d52843..54fa2def1e 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -131,7 +131,14 @@ def get_outstream_width(self):
         return 0
 
     def get_weightstream_width(self):
-        return 0
+        # Only 'decoupled' mode is supported
+        mem_mode = self.get_nodeattr("mem_mode")
+        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+        pe = self.get_nodeattr("PE")
+        wp = self.get_weight_datatype().bitwidth()
+        n_thres_steps = self.get_nodeattr("numSteps")
+        w_width = pe * wp * n_thres_steps
+        return w_width
 
     def get_folded_input_shape(self):
         fold = self.calc_tmem()

From 9aa7ff3f8c1a0584afd8684e9280d77aada43105 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 09:48:56 +0000
Subject: [PATCH 021/111] [thresholding] add method get_in/output_width()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 54fa2def1e..a1b75b3de1 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -125,10 +125,12 @@ def minimize_accumulator_width(self, model):
         return None
 
     def get_instream_width(self):
-        return 0
+        i_bits = self.get_input_datatype().bitwidth()
+        return i_bits * self.get_nodeattr("PE")
 
     def get_outstream_width(self):
-        return 0
+        o_bits = self.get_output_datatype().bitwidth()
+        return o_bits * self.get_nodeattr("PE")
 
     def get_weightstream_width(self):
         # Only 'decoupled' mode is supported

From 608b5da9222e2ede4792c487dc4d77fb5ef02e16 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 09:51:10 +0000
Subject: [PATCH 022/111] [thresholding] add method body for
 code_generation_ipi()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index a1b75b3de1..4ca651be76 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -289,7 +289,32 @@ def execute_node(self, context, graph):
         return
 
     def code_generation_ipi(self):
-        return []
+        """Constructs and returns the TCL commands for node instantiation as an RTL block."""
+        cmd = []
+        rtl_file_list = self.get_rtl_file_list()
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        for rtl_file in rtl_file_list:
+            cmd.append("add_files -norecurse %s"
+            % (
+                os.path.join(
+                    code_gen_dir, rtl_file
+                )
+            ))
+
+        # Create an RTL block, not an IP core (-type ip)
+        cmd.append("create_bd_cell -type module -reference %s %s"
+            % (self.get_nodeattr("gen_top_module"), self.onnx_node.name))
+
+        # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
+        # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000)
+        cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]")
+
+        # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
+        # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000)
+        cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]")
+
+        return cmd
 
     def global_includes(self):
         pass

From ca6e7e745c4ad810ac824ee3b6ccd55bdb6f724d Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 09:56:01 +0000
Subject: [PATCH 023/111] [thresholding] add method
 get_verilog_top_module_intf_names()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 4ca651be76..5dac98ad66 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -316,6 +316,28 @@ def code_generation_ipi(self):
 
         return cmd
 
+    def get_verilog_top_module_intf_names(self):
+        """Return a dict of names of input and output interfaces.
+        The keys reflect the protocols each interface implements:
+        'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'.
+        Values are lists of tuples (axis, aximm) or names (axilite):
+        'axis' tuples correspond to the list of node inputs in order,
+        each tuple is (interface_name, interface_width_bits).
+        axilite always assumed to be 32 bits and is not tuple (name only).
+        Each block must have at most one aximm and one axilite."""
+
+        intf_names = super().get_verilog_top_module_intf_names()
+        # Only 'decoupled' mode is supported - check before adding axilite interface
+        mem_mode = self.get_nodeattr("mem_mode")
+        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+        intf_names["axilite"] = ["s_axilite"]
+        intf_names["s_axis"] = [["s_axis"]]
+        intf_names["m_axis"] = [["m_axis"]]
+
+        self.set_nodeattr("runtime_writeable_weights", 1)
+
+        return intf_names
+
     def global_includes(self):
         pass
 

From 7266ee91af50a149d1d8310401e2a4134cdac18c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 10:41:14 +0000
Subject: [PATCH 024/111] [thresholding] retrieve axilite write sequence for
 runtime weight programming

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 5dac98ad66..07b675f0f3 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -338,6 +338,59 @@ def get_verilog_top_module_intf_names(self):
 
         return intf_names
 
+    def find_next_power_of_2(self, n):
+        # Negative values will loop infinitely below - return 0
+        if n <= 0:
+            return 0
+        # If '1' is requested, output will be '0' in the loop below, so avoid this earlier.
+        elif n == 1:
+            return 2 # i.e. 2**1
+
+        # decrement 'n' (to handle cases when `n` itself is a power of 2)
+        n = n - 1
+
+        # loop until only one bit is left
+        while n & n - 1:
+            # unset rightmost bit
+            n = n & n - 1
+        return n << 1
+
+    def twos_comp(self, val, bitwidth):
+        return (val + (1 << bitwidth)) % (1 << bitwidth)
+
+    def prep_axilite_val(self, val):
+        return self.twos_comp(int(val), self.get_weight_datatype().bitwidth())
+
+    def get_dynamic_config(self, model, address_stride=1):
+        ## TODO - not sure this description is correct
+        """Returns a configuration dictionary containing axilite write commands
+        in order to program the thresholds into the RTL core during runtime.
+        The default address stride for the weights is 1 byte."""
+
+        thresholds = model.get_initializer(self.onnx_node.input[1])
+        num_channels, num_weights_per_channel = thresholds.shape
+
+        weight_addr_boundary = self.find_next_power_of_2(num_weights_per_channel)
+        # Make sure that the next power of 2 (output) is greater than the input
+        assert weight_addr_boundary >= num_weights_per_channel
+
+        config = {}
+        channel_cntr = 0
+        for channel in thresholds:
+            channel_start_addr = (channel_cntr * weight_addr_boundary * address_stride)
+            weight_cntr = 0
+            addr = 0
+            for weight in channel:
+                key_name = "{}_{}{}_{}{}".format("axilite", "ch", str(channel_cntr), "w", str(weight_cntr))
+                config[key_name] = (channel_start_addr + addr, self.prep_axilite_val(weight))
+
+                weight_cntr += 1
+                addr += address_stride
+
+            channel_cntr += 1
+
+        return config
+
     def global_includes(self):
         pass
 

From f88bdbfeb4ade334740d29fa81f6a83174635ad2 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 11:06:39 +0000
Subject: [PATCH 025/111] [thresholding] add methods for creating weight files
 for each simulation type

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 130 +++++++++++++++++-
 1 file changed, 128 insertions(+), 2 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 07b675f0f3..6ed07287ab 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -27,9 +27,17 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import numpy as np
 from qonnx.core.datatype import DataType
-
+from qonnx.util.basic import (
+    interleave_matrix_outer_dim_from_partitions,
+    roundup_to_integer_multiple,
+)
+from finn.util.data_packing import (
+    pack_innermost_dim_as_hex_string,
+)
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+import warnings
 
 """@package thresholding_binary_search
 - ONNX i/o tensor shape assumptions for Thresholding:
@@ -172,6 +180,63 @@ def get_exp_cycles(self):
     def get_template_param_values(self):
         return dict()
 
+    def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
+        """Convert the original numpy weight matrix orig_weight_matrix into
+        a form suitable for passing to the hlslib call:
+        * ensure MH % PE == 0
+        * for unsigned inputs, ensure thresholds are positive
+        * interleave rows between PEs
+        * reshape into (PE, TMEM, n_thres_steps) and return
+        """
+        mh = self.get_nodeattr("NumChannels")
+        pe = self.get_nodeattr("PE")
+        tmem = mh // pe
+        assert mh % pe == 0, "Requirement NumChannels divisable by PE is violated."
+        assert (
+            orig_thres_matrix.ndim == 2
+        ), """Threshold matrix dimension is
+        not as expected (2)."""
+        n_thres_steps = orig_thres_matrix.shape[1]
+        assert n_thres_steps == self.get_nodeattr(
+            "numSteps"
+        ), "Mismatch in threshold steps"
+        if not self.get_input_datatype().signed():
+            # ensure all thresholds are nonnegative
+            assert (orig_thres_matrix >= 0).all()
+        # ensure all thresholds are integer
+        assert np.equal(
+            np.mod(orig_thres_matrix, 1), 0
+        ).all(), "Need int threshold tensor"
+        ret = orig_thres_matrix
+        # workaround for vivado_hls threshold bug
+        if ret[0][0] == 0 and n_thres_steps == 1:
+            ret = np.copy(ret)
+            ret[0][0] = 1
+            warnings.warn(
+                "Setting 0-valued first threshold to 1 to avoid vivado_hls bug"
+            )
+        # ensure channels = mh , duplicating if necessary
+        if ret.shape[0] == 1:
+            ret = np.tile(ret, (mh, 1))
+        assert (
+            ret.shape[0] == mh
+        ), "Channels of threshold matrix are not as expected (mh)"
+        # distribute rows between PEs
+        ret = interleave_matrix_outer_dim_from_partitions(ret, pe)
+        assert (
+            ret.shape[0] == pe
+        ), """First dimension after distribution of the
+        rows between PEs is not as expected (pe)"""
+        assert (
+            ret.shape[1] == tmem
+        ), """Second dimension after distribution of the
+        rows between PEs is not as expected (tmem)"""
+        assert (
+            ret.shape[2] == n_thres_steps
+        ), """Third dimension after distribution of the
+        rows between PEs is not as expected (n_thres_steps)"""
+        return ret.reshape(1, pe, tmem, n_thres_steps)
+
     def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         """Produce a file containing given weights (thresholds) in appropriate
         format for this layer. This file can be used for either synthesis or
@@ -183,7 +248,68 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
           decoupled_runtime}
         * weight_file_name : filename for the weight file to be generated
         """
-        return
+        # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related
+        if "decoupled" not in weight_file_mode: raise Exception("Unrecognized memory mode for this node: {}".format(weight_file_mode))
+
+        threshold_tensor = self.get_hls_compatible_threshold_tensor(weights)
+        tdt = self.get_weight_datatype()
+        assert np.vectorize(tdt.allowed)(
+            threshold_tensor
+        ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+
+        # streaming thresholds need to be organized differently
+        # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps)
+        decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3))
+        # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps)
+        pe = self.get_nodeattr("PE")
+        n_thres_steps = self.get_nodeattr("numSteps")
+        decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2)
+        decoupled_thres = decoupled_thres.reshape(1, -1, pe * n_thres_steps)
+        decoupled_thres = decoupled_thres.copy()
+        decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape(
+            1, -1, pe * n_thres_steps
+        )
+        decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy()
+
+        if weight_file_mode == "decoupled_npy":
+            # save weight stream into npy for cppsim
+            np.save(weight_file_name, decoupled_thres)
+        elif weight_file_mode == "decoupled_verilog_dat":
+            # convert weight values into hexstring
+            weight_width = self.get_weightstream_width()
+            # pad to nearest 4 bits to get hex strings
+            weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
+            weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
+                decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix=""
+            )
+            weight_stream = weight_tensor_pe_flipped.flatten()
+            weight_stream = weight_stream.copy()
+            with open(weight_file_name, "w") as f:
+                for val in weight_stream:
+                    f.write(val + "\n")
+        elif weight_file_mode == "decoupled_runtime":
+            # memstream axi-lite interface will map each mem line to
+            # one or multiple 32-bit words
+            weight_width = self.get_weightstream_width()
+            words_per_memwidth = 2 ** ceil(log2(weight_width / 32))
+            if words_per_memwidth < 1:
+                words_per_memwidth = 1
+            weight_width_padded = words_per_memwidth * 32
+            # first, pack and ensure padding to 32 bits
+            weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
+                decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix=""
+            )
+            weight_stream = weight_tensor_pe_flipped.flatten()
+            weight_stream = weight_stream.copy()
+            with open(weight_file_name, "w") as f:
+                for val in weight_stream:
+                    # split into groups of 8 hex digits (= 32 bits)
+                    words_32b = textwrap.wrap(val, 8)
+                    words_32b.reverse()
+                    for word_32b in words_32b:
+                        f.write(word_32b + "\n")
+        else:
+            raise Exception("Decoupled weight export not yet implemented")
 
     # Get the integer from the DataType and string-ify it
     # This assumes that the data is in the form "INTx" or similar

From 560771a1b87a6f25dd2274232be55d86b350f74b Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 11:08:53 +0000
Subject: [PATCH 026/111] [thresholding] add method generate_params()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 6ed07287ab..ff9f5f4875 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -408,7 +408,45 @@ def code_generation_ipgen(self, model, fpgapart, clk):
         self.set_nodeattr("ipgen_path", code_gen_dir)
         self.set_nodeattr("ip_path", code_gen_dir)
 
+        # Generate params for RTLSim
+        self.generate_params(model, code_gen_dir)
+
     def generate_params(self, model, path):
+        # Only 'decoupled' mode is supported
+        mem_mode = self.get_nodeattr("mem_mode")
+        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+
+        code_gen_dir = path
+        weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir)
+        thresholds = model.get_initializer(self.onnx_node.input[1])
+        self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim)
+
+        # Verilog.dat thresholds:
+        # also save weights as Verilog .dat file
+        # note that we provide two different .dat files, one for synth
+        # and one for synthesis. this is because URAM-based weights always
+        # need zero weights for synthesis, otherwise they get inferred
+        # as BRAM
+        weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir)
+        weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir)
+        # sim weights are always the true weights
+        self.make_weight_file(
+            thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim
+        )
+
+        # Synthesis thresholds:
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "ultra":
+            # UltraRAM must have no memory initializer, or only zeroes
+            # otherwise BRAM will be inferred instead of URAM
+            # as a workaround we provide a zero-weight init here
+            synth_thresholds = np.zeros_like(thresholds, dtype=np.float32)
+        else:
+            synth_thresholds = thresholds
+        self.make_weight_file(
+            synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth
+        )
+
         return
 
     def execute_node(self, context, graph):

From e763bf80773be4e362f9f9171a01bb4b9eb4dc8a Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 11:11:49 +0000
Subject: [PATCH 027/111] [thresholding] add method for preparing a Pyverilator
 object for RTL simulation

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index ff9f5f4875..611a75992e 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -28,6 +28,7 @@
 
 import os
 import numpy as np
+import warnings
 from qonnx.core.datatype import DataType
 from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
@@ -37,7 +38,12 @@
     pack_innermost_dim_as_hex_string,
 )
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-import warnings
+from finn.util.basic import make_build_dir, get_rtlsim_trace_depth
+
+try:
+    from pyverilator import PyVerilator
+except ModuleNotFoundError:
+    PyVerilator = None
 
 """@package thresholding_binary_search
 - ONNX i/o tensor shape assumptions for Thresholding:
@@ -449,6 +455,31 @@ def generate_params(self, model, path):
 
         return
 
+    def prepare_rtlsim(self):
+        """Creates a Verilator emulation library for the RTL code generated
+        for this node, sets the rtlsim_so attribute to its path and returns
+        a PyVerilator wrapper around it."""
+
+        if PyVerilator is None:
+            raise ImportError("Installation of PyVerilator is required.")
+
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        verilog_paths = [code_gen_dir]
+        verilog_files = self.get_rtl_file_list()
+
+        # build the Verilator emulation library
+        sim = PyVerilator.build(
+            verilog_files,
+            build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"),
+            verilog_path=verilog_paths,
+            trace_depth=get_rtlsim_trace_depth(),
+            top_module_name=self.get_nodeattr("gen_top_module"),
+        )
+
+        # save generated lib filename in attribute
+        self.set_nodeattr("rtlsim_so", sim.lib._name)
+        return sim
+
     def execute_node(self, context, graph):
         return
 

From 84e08f18a031dbfacec6a11b980c09885552efdf Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 11:14:47 +0000
Subject: [PATCH 028/111] [thresholding] add method to run rtlsim on a
 thresholding binary search simulation object

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 611a75992e..4c7c67af72 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -35,6 +35,8 @@
     roundup_to_integer_multiple,
 )
 from finn.util.data_packing import (
+    npy_to_rtlsim_input,
+    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
 )
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -481,6 +483,83 @@ def prepare_rtlsim(self):
         return sim
 
     def execute_node(self, context, graph):
+        # Perform input checks
+        if self.get_nodeattr("exec_mode") != "rtlsim": raise Exception("Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(self.get_nodeattr("exec_mode")))
+        if self.get_nodeattr("mem_mode") != "decoupled": raise Exception("Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(self.get_nodeattr("mem_mode")))
+
+        node = self.onnx_node
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        # create a npy file fore each input of the node (in_ind is input index)
+        in_ind = 0
+        for inputs in node.input:
+            # it is assumed that the first input of the node is the data input
+            # the second input are the weights
+            # the third input are the thresholds
+            if in_ind == 0:
+                assert (
+                    str(context[inputs].dtype) == "float32"
+                ), """Input datatype is
+                not float32 as expected."""
+                expected_inp_shape = self.get_folded_input_shape()
+                reshaped_input = context[inputs].reshape(expected_inp_shape)
+
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
+                    # store bipolar activations as binary
+                    reshaped_input = (reshaped_input + 1) / 2
+                    export_idt = DataType["BINARY"]
+                else:
+                    export_idt = self.get_input_datatype()
+
+                # make copy before saving the array
+                reshaped_input = reshaped_input.copy()
+                np.save(
+                    os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
+                    reshaped_input,
+                )
+            elif in_ind > 2:
+                raise Exception("Unexpected input found for Thresholding_Batch")
+            in_ind += 1
+
+        # Create a PyVerilator wrapper of the RTLSim .so
+        sim = self.get_rtlsim()
+        nbits = self.get_instream_width()
+        inp = npy_to_rtlsim_input(
+            "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+        )
+
+        super().reset_rtlsim(sim)
+        super().toggle_clk(sim)
+
+        wnbits = self.get_weightstream_width()
+        export_wdt = self.get_weight_datatype()
+        wei = npy_to_rtlsim_input(
+            "{}/thresholds.npy".format(code_gen_dir), export_wdt, wnbits
+        )
+        num_w_reps = np.prod(self.get_nodeattr("numInputVectors"))
+        io_dict = {
+            "inputs": {"in0": inp, "weights": wei * num_w_reps},
+            "outputs": {"s_axis": []},
+        }
+        self.rtlsim_multi_io(sim, io_dict)
+        output = io_dict["outputs"]["out"]
+
+        # Manage output data
+        odt = self.get_output_datatype()
+        target_bits = odt.bitwidth()
+        packed_bits = self.get_outstream_width()
+        out_npy_path = "{}/output.npy".format(code_gen_dir)
+        out_shape = self.get_folded_output_shape()
+
+        rtlsim_output_to_npy(
+            output, out_npy_path, odt, out_shape, packed_bits, target_bits
+        )
+
+        # load and reshape output
+        output = np.load(out_npy_path)
+        oshape = self.get_normal_output_shape()
+        output = np.asarray([output], dtype=np.float32).reshape(*oshape)
+        context[node.output[0]] = output
         return
 
     def code_generation_ipi(self):

From b0be07adb8e2bb0ab5005169ff0f878efc5c7c80 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 11:16:33 +0000
Subject: [PATCH 029/111] [thresholding] add stubbed method for
 ipgen_singlenode_code()

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py   | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 4c7c67af72..19140a0090 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -665,6 +665,13 @@ def get_dynamic_config(self, model, address_stride=1):
 
         return config
 
+    def ipgen_singlenode_code(self):
+        """Normally: Builds the bash script for IP generation."""
+        """This is needed for the HLSSynthIP() transformation.
+        This is an IP, not a HLS node, so therefore provide an empty hook
+        to prevent any HLS synthesis."""
+        pass
+
     def global_includes(self):
         pass
 

From 30d22f88a40864257a97f7e9e9ff84f25c1bc32e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 16 Nov 2022 13:51:10 +0000
Subject: [PATCH 030/111] [thresholding] update class name to a more consistent
 naming convention

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/__init__.py                   | 4 ++--
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 65fbd6e20c..dc9a5a349a 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -59,7 +59,7 @@
 from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
 from finn.custom_op.fpgadataflow.thresholding_binary_search import (
-    Thresholding_Bin_Search,
+    Thresholding_Binary_Search,
 )
 from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
 from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
@@ -82,7 +82,7 @@
 custom_op["Pool_Batch"] = Pool_Batch
 custom_op["FMPadding_Batch"] = FMPadding_Batch
 custom_op["Thresholding_Batch"] = Thresholding_Batch
-custom_op["Thresholding_Binary_search"] = Thresholding_Bin_Search
+custom_op["Thresholding_Binary_Search"] = Thresholding_Binary_Search
 custom_op["AddStreams_Batch"] = AddStreams_Batch
 custom_op["LabelSelect_Batch"] = LabelSelect_Batch
 custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 19140a0090..9bf36283da 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -59,7 +59,7 @@
 """
 
 
-class Thresholding_Bin_Search(HLSCustomOp):
+class Thresholding_Binary_Search(HLSCustomOp):
     """Class that corresponds to finn-rtllib 'thresholding' function."""
 
     def __init__(self, onnx_node):

From 3594edddf51f8a13053a6ad99e179d081e15d8d4 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 17 Nov 2022 09:54:46 +0000
Subject: [PATCH 031/111] [thresholding] add fpgadataflow pytests for
 thresholding binary search node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 ...fpgadataflow_thresholding_binary_search.py | 417 ++++++++++++++++++
 1 file changed, 417 insertions(+)
 create mode 100755 tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py

diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
new file mode 100755
index 0000000000..0a02503300
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -0,0 +1,417 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+import numpy as np
+from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+test_fpga_part = "xczu3eg-sbva484-1-e"
+target_clk_ns = 5
+
+# Helper functions
+def sort_thresholds_increasing(thresholds):
+    return np.sort(thresholds, axis=1)
+
+def generate_random_threshold_values(input_data_type, num_input_channels, num_steps):
+    return np.random.randint(input_data_type.min(), input_data_type.max() + 1, (num_input_channels, num_steps)).astype(np.float32)
+
+def generate_pe_value(fold, num_input_channels):
+    if fold == -1:
+        fold = num_input_channels
+    pe = num_input_channels // fold
+    assert num_input_channels % pe == 0
+    return pe
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NCHW to NHWC
+def convert_np_array_to_finn_data_layout(data):
+    return np.transpose(data, (0, 2, 3, 1))
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NHWC to NCHW
+def convert_np_array_to_standard_data_layout(data):
+    return np.transpose(data, (0, 3, 1, 2))
+
+def make_single_thresholding_binary_search_modelwrapper(
+    thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+):
+    NumChannels = thresholds.shape[0]
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+
+    node_inp_list = ["inp", "thresh"]
+
+    Thresholding_node = helper.make_node(
+        "Thresholding_Binary_Search",
+        node_inp_list,
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        NumChannels=NumChannels,
+        PE=pe,
+        numSteps=thresholds.shape[1],
+        inputDataType=input_data_type.name,
+        weightDataType=input_data_type.name,
+        outputDataType=output_data_type.name,
+        activation_bias=activation_bias,
+        mem_mode=mem_mode,
+        numInputVectors=num_input_vecs,
+    )
+    graph = helper.make_graph(
+        nodes=[Thresholding_node],
+        name="thresholding_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="thresholding-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", input_data_type)
+    model.set_tensor_datatype("outp", output_data_type)
+
+    model.set_tensor_datatype("thresh", input_data_type)
+    model.set_initializer("thresh", thresholds)
+    return model
+
+# Test brief: a particular method for this class was causing a bug - find_next_power_of_2()
+# Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable.
+# When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary.
+# This test is to show that the function that calculates that boundary is working correctly.
+#
+# A Thresholding_Binary_Search layer was created and a SW generated dataset with a threshold channel
+# depth of 1 weight (1 layer of N channels in the thresholding core). However, find_next_power_of_2()
+# was returning a next-power-of-2 address boundary at address '0', instead of '2'. This unit test
+# is to prove that this bug no longer occurs. It was originally seen when the input datatype
+# was 'DataType["BIPOLAR"]'.
+@pytest.mark.tbs_unit
+@pytest.mark.tbs_all
+def test_fpgadataflow_thresholding_binary_search_unit():
+    activation = DataType["BIPOLAR"]
+    input_data_type = DataType["INT16"]
+    fold = -1
+    num_input_channels = 16
+    mem_mode = "decoupled"
+
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = activation.get_num_possible_values() - 1
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # Generate random thresholds and sort in ascending order
+    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    )
+
+    # Retrieve the class to get the method-under-test
+    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+    tbs_inst = getCustomOp(tbs_node)
+
+    test_vector = [
+        {"input": -2, "expected_result": 0},
+        {"input": -1, "expected_result": 0},
+        {"input": 0, "expected_result": 0},
+        {"input": 1, "expected_result": 2},
+        {"input": 2, "expected_result": 2},
+        {"input": 3, "expected_result": 4},
+        {"input": 4, "expected_result": 4},
+        {"input": 7, "expected_result": 8},
+        {"input": 8, "expected_result": 8},
+        {"input": 11, "expected_result": 16},
+        {"input": 15, "expected_result": 16},
+        {"input": 16, "expected_result": 16},
+        {"input": 18, "expected_result": 32},
+        {"input": 27, "expected_result": 32},
+        {"input": 31, "expected_result": 32},
+        {"input": 32, "expected_result": 32},
+        {"input": 42, "expected_result": 64},
+        {"input": 65, "expected_result": 128},
+    ]
+
+    for test_dict in test_vector:
+        output = tbs_inst.find_next_power_of_2(test_dict["input"])
+        assert output >= test_dict["input"]
+        assert output == test_dict["expected_result"]
+
+    return
+
+# Test brief: Prove that cppsim is not supported for this class
+@pytest.mark.tbs_cppsim
+@pytest.mark.tbs_all
+def test_fpgadataflow_thresholding_binary_search_cppsim():
+    input_data_type = DataType["UINT16"]
+    act = DataType["BIPOLAR"]
+    fold = -1
+    num_input_channels = 16
+    mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode
+
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = act.get_num_possible_values() - 1
+
+    # Generate random, non-decreasing thresholds
+    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+
+    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
+    # threshold of first channel is zero, while using BIPOLAR output)
+    if act == DataType["BIPOLAR"]:
+        thresholds[0][0] = 0
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = act
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    )
+
+    # Cppsim is not supported for this class, catch the specific exception thrown by cppsim
+    # Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is currently not supported.
+    try:
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+        model = model.transform(SetExecMode("cppsim"))
+    except Exception as e:
+        if str(e) != "Custom op_type Thresholding_Binary_Search is currently not supported.":
+            raise
+
+# Test brief: Prove that memory mode 'const' is not supported for this layer type
+@pytest.mark.tbs_const
+@pytest.mark.tbs_all
+def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
+    input_data_type = DataType["INT16"]
+    activation = DataType["INT4"]
+    fold = -1
+    num_input_channels = 16
+    mem_mode = "const"
+
+    pe = generate_pe_value(fold, num_input_channels)
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    activation_bias = output_data_type.min()
+
+    # Generate random thresholds and sort in ascending order
+    num_steps = activation.get_num_possible_values() - 1
+    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    )
+
+    # Prove that 'const' memory mode is not supported for this class
+    # 'const' memory mode is not supported for this class, catch the specific exception thrown by FINN
+    # Exception: ('Unrecognized memory mode for this node:', 'const')
+    try:
+        model = model.transform(InsertFIFO(True))
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+    except Exception as e:
+        if str(e) != "Unrecognized memory mode for this node: {}".format(mem_mode):
+            raise
+        # Caught the expected exception, leave the test early
+        return
+
+# Test brief: Test that PrepareRTLSim() runs successfully. This function is not
+# tested in test_fpgadataflow_thresholding_binary_search()
+@pytest.mark.tbs_prep_rtlsim
+@pytest.mark.tbs_all
+def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
+    input_data_type = DataType["INT16"]
+    act = DataType["INT4"]
+    fold = -1
+    num_input_channels = 16
+    mem_mode = "decoupled"
+
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = act.get_num_possible_values() - 1
+
+    # Generate random, non-decreasing thresholds
+    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
+    # threshold of first channel is zero, while using BIPOLAR output)
+    if act == DataType["BIPOLAR"]:
+        thresholds[0][0] = 0
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = act
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    )
+
+    model = model.transform(SetExecMode("rtlsim"))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(PrepareRTLSim())
+    return
+
+# Test brief: Create a Thresholding binary search layer using various parameters
+# and test against a SW generated & simulated dataset
+# N.B. - fold factor of '-1' is supported only (no PE/SIMD support)
+@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
+@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
+@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail
+@pytest.mark.parametrize("num_input_channels", [16])
+# no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode()
+@pytest.mark.parametrize("mem_mode", ["decoupled"])
+@pytest.mark.tbs_soak
+@pytest.mark.tbs_all
+def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fold, num_input_channels, mem_mode):
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = activation.get_num_possible_values() - 1
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # generate random input data
+    tensor_shape = tuple(num_input_vecs + [num_input_channels])
+    x = gen_finn_dt_tensor(input_data_type, tensor_shape)
+
+    # Generate random thresholds and sort in ascending order
+    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+
+    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
+    # threshold of first channel is zero, while using BIPOLAR output)
+    if activation == DataType["BIPOLAR"]:
+        thresholds[0][0] = 0
+
+    # provide non-decreasing/ascending thresholds
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    x_nhwc = convert_np_array_to_standard_data_layout(x)
+    y = multithreshold(x_nhwc, thresholds)
+
+    # convert back to NHWC for comparison to hw outputs
+    y = convert_np_array_to_finn_data_layout(y)
+    if activation == DataType["BIPOLAR"]:
+        # binary to bipolar
+        y = 2 * y - 1
+    else:
+        # signed offset
+        y += activation.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    )
+
+    model = model.transform(InsertFIFO(True))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    # Retrieve the axilite programming sequence for the weights - for decoupled mode only
+    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+    tbs_inst = getCustomOp(tbs_node)
+    config = tbs_inst.get_dynamic_config(model)
+
+    # Reshape generated data (not from model)
+    oshape = model.get_tensor_shape("outp")
+    y_expected = y.reshape(oshape)
+
+    # Helper function that delivers the hook to program the thresholds via AXI-Lite
+    def config_hook(config):
+        if config is None:
+            return None
+
+        def write_thresh_config(sim):
+            # axi_name = "s_axilite_0_" # works
+            axi_name = getCustomOp(model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]).get_verilog_top_module_intf_names()['axilite'][0]
+            axi_name += "_0_"
+
+            # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples
+            for config_entry in config.values():
+                addr = config_entry[0]
+                val = config_entry[1]
+                axilite_write(sim, addr, val, basename=axi_name)
+
+            reset_rtlsim(sim)
+        return write_thresh_config
+
+    input_dict = {"inp": x}
+    rtlsim_exec(model, input_dict, pre_hook=config_hook(config))
+    y_produced = input_dict["outp"]
+    assert (y_produced == y_expected).all()

From 0bee70d5e4bc5fd163b8cf8a84931ac709aaac35 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 17 Nov 2022 10:08:38 +0000
Subject: [PATCH 032/111] [thresholding] add linter fixes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 121 ++++++++++++------
 ...fpgadataflow_thresholding_binary_search.py | 103 ++++++++++++---
 2 files changed, 168 insertions(+), 56 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 9bf36283da..b785abcaa8 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -26,21 +26,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 import warnings
 from qonnx.core.datatype import DataType
 from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
 )
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.basic import get_rtlsim_trace_depth, make_build_dir
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
-    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
 )
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import make_build_dir, get_rtlsim_trace_depth
 
 try:
     from pyverilator import PyVerilator
@@ -151,7 +152,10 @@ def get_outstream_width(self):
     def get_weightstream_width(self):
         # Only 'decoupled' mode is supported
         mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+        if mem_mode != "decoupled":
+            raise Exception(
+                "Unrecognized memory mode for this node: {}".format(mem_mode)
+            )
         pe = self.get_nodeattr("PE")
         wp = self.get_weight_datatype().bitwidth()
         n_thres_steps = self.get_nodeattr("numSteps")
@@ -257,7 +261,10 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         * weight_file_name : filename for the weight file to be generated
         """
         # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related
-        if "decoupled" not in weight_file_mode: raise Exception("Unrecognized memory mode for this node: {}".format(weight_file_mode))
+        if "decoupled" not in weight_file_mode:
+            raise Exception(
+                "Unrecognized memory mode for this node: {}".format(weight_file_mode)
+            )
 
         threshold_tensor = self.get_hls_compatible_threshold_tensor(weights)
         tdt = self.get_weight_datatype()
@@ -334,21 +341,35 @@ def prepare_codegen_rtl_values(self):
 
         # Identify the module names
         code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()]
-        code_gen_dict["$MODULE_NAME_AXI$"] = [self.get_verilog_top_module_name() + "_axi"]
-        code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [self.get_verilog_top_module_name() + "_axi_wrapper"]
+        code_gen_dict["$MODULE_NAME_AXI$"] = [
+            self.get_verilog_top_module_name() + "_axi"
+        ]
+        code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [
+            self.get_verilog_top_module_name() + "_axi_wrapper"
+        ]
         # Set the top module name - AXI wrapper
         code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"]
 
         # Identify the module variables
-        output_data_type = self.get_nodeattr("outputDataType") # output precision
-        input_data_type = self.get_nodeattr("inputDataType") # input/threshold precision
-        num_channels = self.get_nodeattr("NumChannels") # number of channels
-        bias = self.get_nodeattr("activation_bias") # activation bias value
-
-        code_gen_dict["$N$"] = [self.conv_datatype_to_str(output_data_type)] # output precision
-        code_gen_dict["$M$"] = [self.conv_datatype_to_str(input_data_type)] # input/threshold precision
-        code_gen_dict["$C$"] = [self.conv_datatype_to_str(num_channels)] # number of channels
-        code_gen_dict["$BIAS$"] = [self.conv_datatype_to_str(bias)] # activation bias value
+        output_data_type = self.get_nodeattr("outputDataType")  # output precision
+        input_data_type = self.get_nodeattr(
+            "inputDataType"
+        )  # input/threshold precision
+        num_channels = self.get_nodeattr("NumChannels")  # number of channels
+        bias = self.get_nodeattr("activation_bias")  # activation bias value
+
+        code_gen_dict["$N$"] = [
+            self.conv_datatype_to_str(output_data_type)
+        ]  # output precision
+        code_gen_dict["$M$"] = [
+            self.conv_datatype_to_str(input_data_type)
+        ]  # input/threshold precision
+        code_gen_dict["$C$"] = [
+            self.conv_datatype_to_str(num_channels)
+        ]  # number of channels
+        code_gen_dict["$BIAS$"] = [
+            self.conv_datatype_to_str(bias)
+        ]  # activation bias value
 
         # Is the input datatype signed or unsigned? The thresholding core needs to know this
         if self.get_input_datatype().min() < 0:
@@ -359,9 +380,7 @@ def prepare_codegen_rtl_values(self):
         return code_gen_dict
 
     def get_rtl_file_list(self):
-        return ["thresholding.sv",
-                "thresholding_axi.sv",
-                "thresholding_axi_wrapper.v"]
+        return ["thresholding.sv", "thresholding_axi.sv", "thresholding_axi_wrapper.v"]
 
     def get_rtl_file_paths(self):
         rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/"
@@ -399,7 +418,7 @@ def generate_hdl(self):
             # apply code generation to templates
             data = self.fill_in_rtl_template_data(code_gen_dict, template_data)
             # dump filled-in template to destination directory for compilation
-            file_only_path = rtl_file_path.split('/')[-1]
+            file_only_path = rtl_file_path.split("/")[-1]
             self.dump_rtl_data(code_gen_dir, file_only_path, data)
 
         # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation
@@ -422,7 +441,10 @@ def code_generation_ipgen(self, model, fpgapart, clk):
     def generate_params(self, model, path):
         # Only 'decoupled' mode is supported
         mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+        if mem_mode != "decoupled":
+            raise Exception(
+                "Unrecognized memory mode for this node: {}".format(mem_mode)
+            )
 
         code_gen_dir = path
         weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir)
@@ -484,8 +506,18 @@ def prepare_rtlsim(self):
 
     def execute_node(self, context, graph):
         # Perform input checks
-        if self.get_nodeattr("exec_mode") != "rtlsim": raise Exception("Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(self.get_nodeattr("exec_mode")))
-        if self.get_nodeattr("mem_mode") != "decoupled": raise Exception("Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(self.get_nodeattr("mem_mode")))
+        if self.get_nodeattr("exec_mode") != "rtlsim":
+            raise Exception(
+                "Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(
+                    self.get_nodeattr("exec_mode")
+                )
+            )
+        if self.get_nodeattr("mem_mode") != "decoupled":
+            raise Exception(
+                "Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(
+                    self.get_nodeattr("mem_mode")
+                )
+            )
 
         node = self.onnx_node
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
@@ -569,24 +601,27 @@ def code_generation_ipi(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
 
         for rtl_file in rtl_file_list:
-            cmd.append("add_files -norecurse %s"
-            % (
-                os.path.join(
-                    code_gen_dir, rtl_file
-                )
-            ))
+            cmd.append(
+                "add_files -norecurse %s" % (os.path.join(code_gen_dir, rtl_file))
+            )
 
         # Create an RTL block, not an IP core (-type ip)
-        cmd.append("create_bd_cell -type module -reference %s %s"
-            % (self.get_nodeattr("gen_top_module"), self.onnx_node.name))
+        cmd.append(
+            "create_bd_cell -type module -reference %s %s"
+            % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)
+        )
 
         # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
         # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000)
-        cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]")
+        cmd.append(
+            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]"
+        )
 
         # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
         # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000)
-        cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]")
+        cmd.append(
+            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]"
+        )
 
         return cmd
 
@@ -603,7 +638,10 @@ def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
         # Only 'decoupled' mode is supported - check before adding axilite interface
         mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode))
+        if mem_mode != "decoupled":
+            raise Exception(
+                "Unrecognized memory mode for this node: {}".format(mem_mode)
+            )
         intf_names["axilite"] = ["s_axilite"]
         intf_names["s_axis"] = [["s_axis"]]
         intf_names["m_axis"] = [["m_axis"]]
@@ -618,7 +656,7 @@ def find_next_power_of_2(self, n):
             return 0
         # If '1' is requested, output will be '0' in the loop below, so avoid this earlier.
         elif n == 1:
-            return 2 # i.e. 2**1
+            return 2  # i.e. 2**1
 
         # decrement 'n' (to handle cases when `n` itself is a power of 2)
         n = n - 1
@@ -651,12 +689,17 @@ def get_dynamic_config(self, model, address_stride=1):
         config = {}
         channel_cntr = 0
         for channel in thresholds:
-            channel_start_addr = (channel_cntr * weight_addr_boundary * address_stride)
+            channel_start_addr = channel_cntr * weight_addr_boundary * address_stride
             weight_cntr = 0
             addr = 0
             for weight in channel:
-                key_name = "{}_{}{}_{}{}".format("axilite", "ch", str(channel_cntr), "w", str(weight_cntr))
-                config[key_name] = (channel_start_addr + addr, self.prep_axilite_val(weight))
+                key_name = "{}_{}{}_{}{}".format(
+                    "axilite", "ch", str(channel_cntr), "w", str(weight_cntr)
+                )
+                config[key_name] = (
+                    channel_start_addr + addr,
+                    self.prep_axilite_val(weight),
+                )
 
                 weight_cntr += 1
                 addr += address_stride
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 0a02503300..579b6fe83c 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
 import numpy as np
 from onnx import TensorProto, helper
 from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
@@ -54,8 +55,14 @@
 def sort_thresholds_increasing(thresholds):
     return np.sort(thresholds, axis=1)
 
+
 def generate_random_threshold_values(input_data_type, num_input_channels, num_steps):
-    return np.random.randint(input_data_type.min(), input_data_type.max() + 1, (num_input_channels, num_steps)).astype(np.float32)
+    return np.random.randint(
+        input_data_type.min(),
+        input_data_type.max() + 1,
+        (num_input_channels, num_steps),
+    ).astype(np.float32)
+
 
 def generate_pe_value(fold, num_input_channels):
     if fold == -1:
@@ -64,20 +71,29 @@ def generate_pe_value(fold, num_input_channels):
     assert num_input_channels % pe == 0
     return pe
 
+
 # n = batch, c = channel, h = height, w = width of feature map
 # Standard = NCHW; FINN = NHWC
 # Convert from NCHW to NHWC
 def convert_np_array_to_finn_data_layout(data):
     return np.transpose(data, (0, 2, 3, 1))
 
+
 # n = batch, c = channel, h = height, w = width of feature map
 # Standard = NCHW; FINN = NHWC
 # Convert from NHWC to NCHW
 def convert_np_array_to_standard_data_layout(data):
     return np.transpose(data, (0, 3, 1, 2))
 
+
 def make_single_thresholding_binary_search_modelwrapper(
-    thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+    thresholds,
+    pe,
+    input_data_type,
+    output_data_type,
+    activation_bias,
+    mem_mode,
+    num_input_vecs,
 ):
     NumChannels = thresholds.shape[0]
 
@@ -123,6 +139,7 @@ def make_single_thresholding_binary_search_modelwrapper(
     model.set_initializer("thresh", thresholds)
     return model
 
+
 # Test brief: a particular method for this class was causing a bug - find_next_power_of_2()
 # Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable.
 # When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary.
@@ -155,11 +172,19 @@ def test_fpgadataflow_thresholding_binary_search_unit():
         activation_bias = output_data_type.min()
 
     # Generate random thresholds and sort in ascending order
-    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
 
     # Generate model from input parameters to the test
     model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        mem_mode,
+        num_input_vecs,
     )
 
     # Retrieve the class to get the method-under-test
@@ -194,6 +219,7 @@ def test_fpgadataflow_thresholding_binary_search_unit():
 
     return
 
+
 # Test brief: Prove that cppsim is not supported for this class
 @pytest.mark.tbs_cppsim
 @pytest.mark.tbs_all
@@ -202,13 +228,15 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
     act = DataType["BIPOLAR"]
     fold = -1
     num_input_channels = 16
-    mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode
+    mem_mode = "decoupled"  # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode
 
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = act.get_num_possible_values() - 1
 
     # Generate random, non-decreasing thresholds
-    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
 
     # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
     # threshold of first channel is zero, while using BIPOLAR output)
@@ -226,7 +254,13 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
 
     # Generate model from input parameters to the test
     model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        mem_mode,
+        num_input_vecs,
     )
 
     # Cppsim is not supported for this class, catch the specific exception thrown by cppsim
@@ -236,9 +270,13 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
         model = model.transform(CompileCppSim())
         model = model.transform(SetExecMode("cppsim"))
     except Exception as e:
-        if str(e) != "Custom op_type Thresholding_Binary_Search is currently not supported.":
+        if (
+            str(e)
+            != "Custom op_type Thresholding_Binary_Search is currently not supported."
+        ):
             raise
 
+
 # Test brief: Prove that memory mode 'const' is not supported for this layer type
 @pytest.mark.tbs_const
 @pytest.mark.tbs_all
@@ -256,11 +294,19 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
 
     # Generate random thresholds and sort in ascending order
     num_steps = activation.get_num_possible_values() - 1
-    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
 
     # Generate model from input parameters to the test
     model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        mem_mode,
+        num_input_vecs,
     )
 
     # Prove that 'const' memory mode is not supported for this class
@@ -278,6 +324,7 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
         # Caught the expected exception, leave the test early
         return
 
+
 # Test brief: Test that PrepareRTLSim() runs successfully. This function is not
 # tested in test_fpgadataflow_thresholding_binary_search()
 @pytest.mark.tbs_prep_rtlsim
@@ -294,7 +341,9 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
     num_steps = act.get_num_possible_values() - 1
 
     # Generate random, non-decreasing thresholds
-    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
     # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
     # threshold of first channel is zero, while using BIPOLAR output)
     if act == DataType["BIPOLAR"]:
@@ -311,7 +360,13 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 
     # Generate model from input parameters to the test
     model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        mem_mode,
+        num_input_vecs,
     )
 
     model = model.transform(SetExecMode("rtlsim"))
@@ -321,18 +376,21 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
     model = model.transform(PrepareRTLSim())
     return
 
+
 # Test brief: Create a Thresholding binary search layer using various parameters
 # and test against a SW generated & simulated dataset
 # N.B. - fold factor of '-1' is supported only (no PE/SIMD support)
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
-@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail
+@pytest.mark.parametrize("fold", [-1])  # 1, 2, etc. will fail
 @pytest.mark.parametrize("num_input_channels", [16])
 # no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode()
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
 @pytest.mark.tbs_soak
 @pytest.mark.tbs_all
-def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fold, num_input_channels, mem_mode):
+def test_fpgadataflow_thresholding_binary_search(
+    activation, input_data_type, fold, num_input_channels, mem_mode
+):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
@@ -350,7 +408,9 @@ def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fo
     x = gen_finn_dt_tensor(input_data_type, tensor_shape)
 
     # Generate random thresholds and sort in ascending order
-    thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps)
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
 
     # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
     # threshold of first channel is zero, while using BIPOLAR output)
@@ -374,7 +434,13 @@ def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fo
 
     # Generate model from input parameters to the test
     model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        mem_mode,
+        num_input_vecs,
     )
 
     model = model.transform(InsertFIFO(True))
@@ -399,7 +465,9 @@ def config_hook(config):
 
         def write_thresh_config(sim):
             # axi_name = "s_axilite_0_" # works
-            axi_name = getCustomOp(model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]).get_verilog_top_module_intf_names()['axilite'][0]
+            axi_name = getCustomOp(
+                model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+            ).get_verilog_top_module_intf_names()["axilite"][0]
             axi_name += "_0_"
 
             # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples
@@ -409,6 +477,7 @@ def write_thresh_config(sim):
                 axilite_write(sim, addr, val, basename=axi_name)
 
             reset_rtlsim(sim)
+
         return write_thresh_config
 
     input_dict = {"inp": x}

From 0689c6a6a03cbc2e9b3982af971144ac186a2c76 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 17 Nov 2022 10:30:50 +0000
Subject: [PATCH 033/111] [thresholding] add flake8 fixes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 37 +++++++++-------
 ...fpgadataflow_thresholding_binary_search.py | 42 +++++++++++--------
 2 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index b785abcaa8..003dbb2fd9 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -28,7 +28,9 @@
 
 import numpy as np
 import os
+import textwrap
 import warnings
+from math import ceil, log2
 from qonnx.core.datatype import DataType
 from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
@@ -260,7 +262,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
           decoupled_runtime}
         * weight_file_name : filename for the weight file to be generated
         """
-        # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related
+        # There are 'decoupled_*' flavors, just make sure that the flavors
+        # are decoupled related
         if "decoupled" not in weight_file_mode:
             raise Exception(
                 "Unrecognized memory mode for this node: {}".format(weight_file_mode)
@@ -371,7 +374,8 @@ def prepare_codegen_rtl_values(self):
             self.conv_datatype_to_str(bias)
         ]  # activation bias value
 
-        # Is the input datatype signed or unsigned? The thresholding core needs to know this
+        # Is the input datatype signed or unsigned?
+        # The thresholding core needs to know this when comparing weights to inputs
         if self.get_input_datatype().min() < 0:
             code_gen_dict["$SIGN$"] = ["signed"]
         else:
@@ -421,7 +425,8 @@ def generate_hdl(self):
             file_only_path = rtl_file_path.split("/")[-1]
             self.dump_rtl_data(code_gen_dir, file_only_path, data)
 
-        # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation
+        # Before we return - set the 'gen_top_module' attribute for use later
+        # by PyVerilator and IPI generation
         self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0])
         return
 
@@ -508,14 +513,14 @@ def execute_node(self, context, graph):
         # Perform input checks
         if self.get_nodeattr("exec_mode") != "rtlsim":
             raise Exception(
-                "Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(
-                    self.get_nodeattr("exec_mode")
+                "Invalid exec_mode value: {}; exec_mode must be set to '{}'".format(
+                    self.get_nodeattr("exec_mode"), "rtlsim"
                 )
             )
         if self.get_nodeattr("mem_mode") != "decoupled":
             raise Exception(
-                "Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(
-                    self.get_nodeattr("mem_mode")
+                "Invalid mem_mode value: {}; mem_mode must be set to '{}'".format(
+                    self.get_nodeattr("mem_mode"), "decoupled"
                 )
             )
 
@@ -595,7 +600,8 @@ def execute_node(self, context, graph):
         return
 
     def code_generation_ipi(self):
-        """Constructs and returns the TCL commands for node instantiation as an RTL block."""
+        """Constructs and returns the TCL commands for node instantiation as an RTL
+        block."""
         cmd = []
         rtl_file_list = self.get_rtl_file_list()
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
@@ -612,15 +618,19 @@ def code_generation_ipi(self):
         )
 
         # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
-        # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000)
+        # /Thresholding_Binary_Search_0/s_axis(100000000 and
+        # /StreamingFIFO_0/out_V(200000000.000000)
         cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]"
+            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]"
+            % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/s_axis")
         )
 
         # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
-        # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000)
+        # /StreamingFIFO_1/in0_V(200000000.000000) and
+        # /Thresholding_Binary_Search_0/m_axis(100000000)
         cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]"
+            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]"
+            % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/m_axis")
         )
 
         return cmd
@@ -654,7 +664,7 @@ def find_next_power_of_2(self, n):
         # Negative values will loop infinitely below - return 0
         if n <= 0:
             return 0
-        # If '1' is requested, output will be '0' in the loop below, so avoid this earlier.
+        # If '1' is requested, output will be '0' in the loop below, avoid this now.
         elif n == 1:
             return 2  # i.e. 2**1
 
@@ -674,7 +684,6 @@ def prep_axilite_val(self, val):
         return self.twos_comp(int(val), self.get_weight_datatype().bitwidth())
 
     def get_dynamic_config(self, model, address_stride=1):
-        ## TODO - not sure this description is correct
         """Returns a configuration dictionary containing axilite write commands
         in order to program the thresholds into the RTL core during runtime.
         The default address stride for the weights is 1 byte."""
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 579b6fe83c..81a089844d 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -51,6 +51,7 @@
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5
 
+
 # Helper functions
 def sort_thresholds_increasing(thresholds):
     return np.sort(thresholds, axis=1)
@@ -140,16 +141,18 @@ def make_single_thresholding_binary_search_modelwrapper(
     return model
 
 
-# Test brief: a particular method for this class was causing a bug - find_next_power_of_2()
-# Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable.
-# When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary.
-# This test is to show that the function that calculates that boundary is working correctly.
+# Test brief: a particular method for this class was causing a bug:
+# find_next_power_of_2()
+# Weights in the thresholding core are programmed on a per-channel basis and are
+# byte-addressable. When a channel is programmed, the next channel can start
+# programming at the next power-of-2 byte boundary. This test is to show that the
+# function that calculates that boundary is working correctly.
 #
-# A Thresholding_Binary_Search layer was created and a SW generated dataset with a threshold channel
-# depth of 1 weight (1 layer of N channels in the thresholding core). However, find_next_power_of_2()
-# was returning a next-power-of-2 address boundary at address '0', instead of '2'. This unit test
-# is to prove that this bug no longer occurs. It was originally seen when the input datatype
-# was 'DataType["BIPOLAR"]'.
+# A Thresholding_Binary_Search layer was created and a SW generated dataset with a
+# threshold channel depth of 1 weight (1 layer of N channels in the thresholding core).
+# However, find_next_power_of_2() was returning a next-power-of-2 address boundary at
+# address '0', instead of '2'. This unit test is to prove that this bug no longer
+# occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'.
 @pytest.mark.tbs_unit
 @pytest.mark.tbs_all
 def test_fpgadataflow_thresholding_binary_search_unit():
@@ -228,7 +231,9 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
     act = DataType["BIPOLAR"]
     fold = -1
     num_input_channels = 16
-    mem_mode = "decoupled"  # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode
+    # 'const' is unsupported see test:
+    # test_fpgadataflow_thresholding_binary_search_const_mem_mode()
+    mem_mode = "decoupled"
 
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = act.get_num_possible_values() - 1
@@ -263,8 +268,9 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
         num_input_vecs,
     )
 
-    # Cppsim is not supported for this class, catch the specific exception thrown by cppsim
-    # Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is currently not supported.
+    # Cppsim is not supported for this class, catch the specific exception thrown by
+    # cppsim. Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is
+    # currently not supported.
     try:
         model = model.transform(PrepareCppSim())
         model = model.transform(CompileCppSim())
@@ -310,8 +316,8 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
     )
 
     # Prove that 'const' memory mode is not supported for this class
-    # 'const' memory mode is not supported for this class, catch the specific exception thrown by FINN
-    # Exception: ('Unrecognized memory mode for this node:', 'const')
+    # 'const' memory mode is not supported for this class, catch the specific exception
+    # thrown by FINN. Exception: ('Unrecognized memory mode for this node:', 'const')
     try:
         model = model.transform(InsertFIFO(True))
         model = model.transform(GiveUniqueNodeNames())
@@ -384,7 +390,8 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
 @pytest.mark.parametrize("fold", [-1])  # 1, 2, etc. will fail
 @pytest.mark.parametrize("num_input_channels", [16])
-# no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode()
+# no need to test 'const' mode, it's already done in:
+# test_fpgadataflow_thresholding_binary_search_const_mem_mode()
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
 @pytest.mark.tbs_soak
 @pytest.mark.tbs_all
@@ -449,7 +456,7 @@ def test_fpgadataflow_thresholding_binary_search(
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
 
-    # Retrieve the axilite programming sequence for the weights - for decoupled mode only
+    # Retrieve the axilite programming sequence for weights - for decoupled mode only
     tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
     tbs_inst = getCustomOp(tbs_node)
     config = tbs_inst.get_dynamic_config(model)
@@ -470,7 +477,8 @@ def write_thresh_config(sim):
             ).get_verilog_top_module_intf_names()["axilite"][0]
             axi_name += "_0_"
 
-            # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples
+            # Write config registers to the Threshold memory.
+            # The dictionary defines (addr, value) tuples.
             for config_entry in config.values():
                 addr = config_entry[0]
                 val = config_entry[1]

From e9a4a7bb9dbdcc6dd2a7dd900f62851891793017 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 17 Nov 2022 12:01:52 +0000
Subject: [PATCH 034/111] [thresholding] change the pytest markers to omit
 tests from quicktest

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 ...fpgadataflow_thresholding_binary_search.py | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 81a089844d..e2189c4c79 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -153,8 +153,8 @@ def make_single_thresholding_binary_search_modelwrapper(
 # However, find_next_power_of_2() was returning a next-power-of-2 address boundary at
 # address '0', instead of '2'. This unit test is to prove that this bug no longer
 # occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'.
-@pytest.mark.tbs_unit
-@pytest.mark.tbs_all
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
 def test_fpgadataflow_thresholding_binary_search_unit():
     activation = DataType["BIPOLAR"]
     input_data_type = DataType["INT16"]
@@ -224,8 +224,8 @@ def test_fpgadataflow_thresholding_binary_search_unit():
 
 
 # Test brief: Prove that cppsim is not supported for this class
-@pytest.mark.tbs_cppsim
-@pytest.mark.tbs_all
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
 def test_fpgadataflow_thresholding_binary_search_cppsim():
     input_data_type = DataType["UINT16"]
     act = DataType["BIPOLAR"]
@@ -284,8 +284,8 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
 
 
 # Test brief: Prove that memory mode 'const' is not supported for this layer type
-@pytest.mark.tbs_const
-@pytest.mark.tbs_all
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
 def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
     input_data_type = DataType["INT16"]
     activation = DataType["INT4"]
@@ -333,8 +333,8 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
 
 # Test brief: Test that PrepareRTLSim() runs successfully. This function is not
 # tested in test_fpgadataflow_thresholding_binary_search()
-@pytest.mark.tbs_prep_rtlsim
-@pytest.mark.tbs_all
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
 def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
     input_data_type = DataType["INT16"]
     act = DataType["INT4"]
@@ -393,8 +393,9 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 # no need to test 'const' mode, it's already done in:
 # test_fpgadataflow_thresholding_binary_search_const_mem_mode()
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
-@pytest.mark.tbs_soak
-@pytest.mark.tbs_all
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
+@pytest.mark.slow
 def test_fpgadataflow_thresholding_binary_search(
     activation, input_data_type, fold, num_input_channels, mem_mode
 ):

From 41c0b4b0799674cd468b9aabfe47a5992891e873 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 25 Nov 2022 14:57:39 +0000
Subject: [PATCH 035/111] [thresholding] update copyright banners of files I
 have added/changed

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/__init__.py                     | 2 +-
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py   | 2 +-
 .../test_fpgadataflow_thresholding_binary_search.py             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index dc9a5a349a..0e17726d48 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (C) 2022, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 003dbb2fd9..7df755ae1b 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (C) 2022, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index e2189c4c79..1e3521a610 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (C) 2022, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

From 71ef39b38d70365f4812cfd6f0d46a1d0198b269 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Thu, 1 Dec 2022 13:12:33 +0000
Subject: [PATCH 036/111] Translate byte to parameter word addressing in AXI
 adapter.

---
 finn-rtllib/thresholding/hdl/thresholding_axi.sv | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 97cdfd3e12..c766e60b9e 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -30,6 +30,12 @@
  *
  * @brief	All-AXI interface adapter for thresholding module.
  * @author	Thomas B. Preußer <tpreusse@amd.com>
+ *
+ * @description
+ *	This AXI adapter fits the core thresholding functionality:
+ *	- with AXI stream data interfaces with flow control
+ *	- with implicit round-robin channel rotation as used by FINN, and
+ *	- performs aligned byte address to parameter word address translation.
  *****************************************************************************/
 
 module $MODULE_NAME_AXI$ #(
@@ -49,7 +55,7 @@ module $MODULE_NAME_AXI$ #(
 	// Writing
 	input	logic                    s_axilite_AWVALID,
 	output	logic                    s_axilite_AWREADY,
-	input	logic [$clog2(C)+N-1:0]  s_axilite_AWADDR,
+	input	logic [$clog2(C)+N+1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
 
 	input	logic         s_axilite_WVALID,
 	output	logic         s_axilite_WREADY,
@@ -109,7 +115,7 @@ module $MODULE_NAME_AXI$ #(
 			else begin
 				if(!WABusy) begin
 					WABusy <= s_axilite_AWVALID;
-					Addr   <= s_axilite_AWADDR[$clog2(C)+N-1:0];
+					Addr   <= s_axilite_AWADDR[$clog2(C)+N+1:2];
 				end
 				if(!WDBusy) begin
 					WDBusy <= s_axilite_WVALID;

From d44a66c949177163099e36ce4e57c9ac992ee70b Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 19 Dec 2022 15:05:08 +0000
Subject: [PATCH 037/111] [thresholding] remove unused attribute

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 7df755ae1b..2ebe6f0a39 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -103,7 +103,6 @@ def get_nodeattr_types(self):
             # always "flush" the accelerator by first passing a dummy input
             # vector through the accelerator. This will get rid of any old
             # weight data from the weight FIFOs.
-            "runtime_writeable_weights": ("i", False, 0, {0, 1}),
             "gen_top_module": ("s", False, ""),
             "activation_bias": ("i", False, 0),
         }
@@ -656,8 +655,6 @@ def get_verilog_top_module_intf_names(self):
         intf_names["s_axis"] = [["s_axis"]]
         intf_names["m_axis"] = [["m_axis"]]
 
-        self.set_nodeattr("runtime_writeable_weights", 1)
-
         return intf_names
 
     def find_next_power_of_2(self, n):

From f79b9ec3e19d83d6469e6e563422fbba70f7a87a Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 19 Dec 2022 15:53:20 +0000
Subject: [PATCH 038/111] [thresholding] remove unnecessary HLS bug prevention
 check

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 ...test_fpgadataflow_thresholding_binary_search.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 1e3521a610..ab98189ea5 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -242,11 +242,6 @@ def test_fpgadataflow_thresholding_binary_search_cppsim():
     thresholds = generate_random_threshold_values(
         input_data_type, num_input_channels, num_steps
     )
-
-    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
-    # threshold of first channel is zero, while using BIPOLAR output)
-    if act == DataType["BIPOLAR"]:
-        thresholds[0][0] = 0
     thresholds = sort_thresholds_increasing(thresholds)
 
     # Other non-input parameters
@@ -350,10 +345,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
     thresholds = generate_random_threshold_values(
         input_data_type, num_input_channels, num_steps
     )
-    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
-    # threshold of first channel is zero, while using BIPOLAR output)
-    if act == DataType["BIPOLAR"]:
-        thresholds[0][0] = 0
     thresholds = sort_thresholds_increasing(thresholds)
 
     # Other non-input parameters
@@ -420,11 +411,6 @@ def test_fpgadataflow_thresholding_binary_search(
         input_data_type, num_input_channels, num_steps
     )
 
-    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
-    # threshold of first channel is zero, while using BIPOLAR output)
-    if activation == DataType["BIPOLAR"]:
-        thresholds[0][0] = 0
-
     # provide non-decreasing/ascending thresholds
     thresholds = sort_thresholds_increasing(thresholds)
 

From 7b82de2c78e14f9dc2017e7c5e9378865011e9da Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 19 Dec 2022 16:40:57 +0000
Subject: [PATCH 039/111] [thresholding] align methods with hlscustom class by
 adding in additional input parameter

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/thresholding_binary_search.py   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 2ebe6f0a39..d69c7e47b7 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -129,10 +129,10 @@ def bram_estimation(self):
     def lut_estimation(self):
         return 0
 
-    def get_input_datatype(self):
+    def get_input_datatype(self, ind=0):
         return DataType[self.get_nodeattr("inputDataType")]
 
-    def get_output_datatype(self):
+    def get_output_datatype(self, ind=0):
         return DataType[self.get_nodeattr("outputDataType")]
 
     def get_weight_datatype(self):
@@ -142,11 +142,11 @@ def get_weight_datatype(self):
     def minimize_accumulator_width(self, model):
         return None
 
-    def get_instream_width(self):
+    def get_instream_width(self, ind=0):
         i_bits = self.get_input_datatype().bitwidth()
         return i_bits * self.get_nodeattr("PE")
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, ind=0):
         o_bits = self.get_output_datatype().bitwidth()
         return o_bits * self.get_nodeattr("PE")
 
@@ -163,24 +163,24 @@ def get_weightstream_width(self):
         w_width = pe * wp * n_thres_steps
         return w_width
 
-    def get_folded_input_shape(self):
+    def get_folded_input_shape(self, ind=0):
         fold = self.calc_tmem()
         pe = self.get_nodeattr("PE")
         vecs = list(self.get_nodeattr("numInputVectors"))
         folded_input_shape = tuple(vecs + [fold, pe])
         return folded_input_shape
 
-    def get_folded_output_shape(self):
+    def get_folded_output_shape(self, ind=0):
         # same shape as input
         return self.get_folded_input_shape()
 
-    def get_normal_input_shape(self):
+    def get_normal_input_shape(self, ind=0):
         num_channels = self.get_nodeattr("NumChannels")
         vecs = list(self.get_nodeattr("numInputVectors"))
         normal_input_shape = tuple(vecs + [num_channels])
         return normal_input_shape
 
-    def get_normal_output_shape(self):
+    def get_normal_output_shape(self, ind=0):
         # same shape as input
         return self.get_normal_input_shape()
 

From e2816d3e1c8ce75ad9f0b1aafbef25af8b305a6c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 19 Dec 2022 16:50:26 +0000
Subject: [PATCH 040/111] [thresholding] replace hardcoded tcl commands with
 node attributes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index d69c7e47b7..fe976c7dbe 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -105,6 +105,7 @@ def get_nodeattr_types(self):
             # weight data from the weight FIFOs.
             "gen_top_module": ("s", False, ""),
             "activation_bias": ("i", False, 0),
+            "clkFreq": ("i", False, 200000000),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -604,6 +605,10 @@ def code_generation_ipi(self):
         cmd = []
         rtl_file_list = self.get_rtl_file_list()
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        node_name = self.onnx_node.name
+        dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
+        din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
+        clock_freq = self.get_nodeattr("clkFreq")
 
         for rtl_file in rtl_file_list:
             cmd.append(
@@ -616,20 +621,14 @@ def code_generation_ipi(self):
             % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)
         )
 
-        # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
-        # /Thresholding_Binary_Search_0/s_axis(100000000 and
-        # /StreamingFIFO_0/out_V(200000000.000000)
         cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]"
-            % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/s_axis")
+            "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]"
+            % (clock_freq, "get_bd_intf_pins", node_name, din_name)
         )
 
-        # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between
-        # /StreamingFIFO_1/in0_V(200000000.000000) and
-        # /Thresholding_Binary_Search_0/m_axis(100000000)
         cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]"
-            % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/m_axis")
+            "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]"
+            % (clock_freq, "get_bd_intf_pins", node_name, dout_name)
         )
 
         return cmd

From bda05ae16e62627d414c80452caa012dee7aa0d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 20 Dec 2022 09:24:04 +0000
Subject: [PATCH 041/111] Fix BIAS parameter specification.

---
 finn-rtllib/thresholding/hdl/thresholding.sv     | 2 +-
 finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 25d6ff3112..b26747d1ff 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -48,7 +48,7 @@ module $MODULE_NAME$ #(
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
 
-	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
 	int unsigned  C_BITS,
 	int unsigned O_BITS
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index c766e60b9e..5cd7746b82 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -43,7 +43,7 @@ module $MODULE_NAME_AXI$ #(
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
 
-	int BIAS,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
 	int unsigned O_BITS
 )(

From 7388e7613ef38b6caa1fafb1129973cefef8716a Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 20 Dec 2022 14:08:07 +0000
Subject: [PATCH 042/111] [thresholding] remove unused ram_style attribute

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/thresholding_binary_search.py        | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index fe976c7dbe..9cbe049be3 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -76,8 +76,6 @@ def get_nodeattr_types(self):
             "NumChannels": ("i", True, 0),
             # number of steps in thresholding function. Used only in decoupled mode
             "numSteps": ("i", True, 1),
-            # string defining memory type
-            "ram_style": ("s", False, "distributed", {"distributed", "block"}),
             # FINN DataTypes for inputs, outputs
             "inputDataType": ("s", True, ""),
             "weightDataType": ("s", True, ""),
@@ -470,14 +468,7 @@ def generate_params(self, model, path):
         )
 
         # Synthesis thresholds:
-        ram_style = self.get_nodeattr("ram_style")
-        if ram_style == "ultra":
-            # UltraRAM must have no memory initializer, or only zeroes
-            # otherwise BRAM will be inferred instead of URAM
-            # as a workaround we provide a zero-weight init here
-            synth_thresholds = np.zeros_like(thresholds, dtype=np.float32)
-        else:
-            synth_thresholds = thresholds
+        synth_thresholds = thresholds
         self.make_weight_file(
             synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth
         )

From e965396e4ddf4848fc9a17b04fa4908a0924568e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 10:40:52 +0000
Subject: [PATCH 043/111] [thresholding] skip test for unsupported cppsim
 configuration and merge tests

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 ...fpgadataflow_thresholding_binary_search.py | 65 +++----------------
 1 file changed, 9 insertions(+), 56 deletions(-)

diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index ab98189ea5..947109794e 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -96,6 +96,7 @@ def make_single_thresholding_binary_search_modelwrapper(
     mem_mode,
     num_input_vecs,
 ):
+
     NumChannels = thresholds.shape[0]
 
     inp = helper.make_tensor_value_info(
@@ -223,61 +224,6 @@ def test_fpgadataflow_thresholding_binary_search_unit():
     return
 
 
-# Test brief: Prove that cppsim is not supported for this class
-@pytest.mark.fpgadataflow
-@pytest.mark.vivado
-def test_fpgadataflow_thresholding_binary_search_cppsim():
-    input_data_type = DataType["UINT16"]
-    act = DataType["BIPOLAR"]
-    fold = -1
-    num_input_channels = 16
-    # 'const' is unsupported see test:
-    # test_fpgadataflow_thresholding_binary_search_const_mem_mode()
-    mem_mode = "decoupled"
-
-    pe = generate_pe_value(fold, num_input_channels)
-    num_steps = act.get_num_possible_values() - 1
-
-    # Generate random, non-decreasing thresholds
-    thresholds = generate_random_threshold_values(
-        input_data_type, num_input_channels, num_steps
-    )
-    thresholds = sort_thresholds_increasing(thresholds)
-
-    # Other non-input parameters
-    num_input_vecs = [1, 2, 2]
-    output_data_type = act
-    if output_data_type == DataType["BIPOLAR"]:
-        activation_bias = 0
-    else:
-        activation_bias = output_data_type.min()
-
-    # Generate model from input parameters to the test
-    model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds,
-        pe,
-        input_data_type,
-        output_data_type,
-        activation_bias,
-        mem_mode,
-        num_input_vecs,
-    )
-
-    # Cppsim is not supported for this class, catch the specific exception thrown by
-    # cppsim. Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is
-    # currently not supported.
-    try:
-        model = model.transform(PrepareCppSim())
-        model = model.transform(CompileCppSim())
-        model = model.transform(SetExecMode("cppsim"))
-    except Exception as e:
-        if (
-            str(e)
-            != "Custom op_type Thresholding_Binary_Search is currently not supported."
-        ):
-            raise
-
-
 # Test brief: Prove that memory mode 'const' is not supported for this layer type
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
@@ -384,16 +330,23 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 # no need to test 'const' mode, it's already done in:
 # test_fpgadataflow_thresholding_binary_search_const_mem_mode()
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
 @pytest.mark.slow
 def test_fpgadataflow_thresholding_binary_search(
-    activation, input_data_type, fold, num_input_channels, mem_mode
+    activation, input_data_type, fold, num_input_channels, mem_mode, exec_mode
 ):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
+    # Cppsim is not supported for this node (as it is an RTL node)
+    if exec_mode == "cppsim":
+        pytest.skip("cppsim not supported for RTL Thresholding Binary Search node")
+    elif exec_mode != "rtlsim":
+        raise Exception("Unknown exec_mode: {}".format(exec_mode))
+
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
     output_data_type = activation

From 2b8a674573e3415e54665ff05a2db75d5c20f30f Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 11:07:40 +0000
Subject: [PATCH 044/111] [thresholding] moving find_next_power_of_2() to the
 util suite

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/util/basic.py                        | 17 ++++
 ...fpgadataflow_thresholding_binary_search.py | 82 -------------------
 tests/util/test_basic.py                      | 62 ++++++++++++++
 3 files changed, 79 insertions(+), 82 deletions(-)
 create mode 100755 tests/util/test_basic.py

diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 4aba87216c..9a66cf90eb 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -216,3 +216,20 @@ def is_exe(fpath):
                 return exe_file
 
     return None
+
+def find_next_power_of_2(n):
+    # Negative values will loop infinitely below - return 0
+    if n <= 0:
+        return 0
+    # If '1' is requested, output will be '0' in the loop below, avoid this now.
+    elif n == 1:
+        return 2  # i.e. 2**1
+
+    # decrement 'n' (to handle cases when `n` itself is a power of 2)
+    n = n - 1
+
+    # loop until only one bit is left
+    while n & n - 1:
+        # unset rightmost bit
+        n = n & n - 1
+    return n << 1
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 947109794e..29fc2828b6 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -142,88 +142,6 @@ def make_single_thresholding_binary_search_modelwrapper(
     return model
 
 
-# Test brief: a particular method for this class was causing a bug:
-# find_next_power_of_2()
-# Weights in the thresholding core are programmed on a per-channel basis and are
-# byte-addressable. When a channel is programmed, the next channel can start
-# programming at the next power-of-2 byte boundary. This test is to show that the
-# function that calculates that boundary is working correctly.
-#
-# A Thresholding_Binary_Search layer was created and a SW generated dataset with a
-# threshold channel depth of 1 weight (1 layer of N channels in the thresholding core).
-# However, find_next_power_of_2() was returning a next-power-of-2 address boundary at
-# address '0', instead of '2'. This unit test is to prove that this bug no longer
-# occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'.
-@pytest.mark.fpgadataflow
-@pytest.mark.vivado
-def test_fpgadataflow_thresholding_binary_search_unit():
-    activation = DataType["BIPOLAR"]
-    input_data_type = DataType["INT16"]
-    fold = -1
-    num_input_channels = 16
-    mem_mode = "decoupled"
-
-    # Handle inputs to the test
-    pe = generate_pe_value(fold, num_input_channels)
-    num_steps = activation.get_num_possible_values() - 1
-
-    # Other non-input parameters
-    num_input_vecs = [1, 2, 2]
-    output_data_type = activation
-    if output_data_type == DataType["BIPOLAR"]:
-        activation_bias = 0
-    else:
-        activation_bias = output_data_type.min()
-
-    # Generate random thresholds and sort in ascending order
-    thresholds = generate_random_threshold_values(
-        input_data_type, num_input_channels, num_steps
-    )
-
-    # Generate model from input parameters to the test
-    model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds,
-        pe,
-        input_data_type,
-        output_data_type,
-        activation_bias,
-        mem_mode,
-        num_input_vecs,
-    )
-
-    # Retrieve the class to get the method-under-test
-    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
-    tbs_inst = getCustomOp(tbs_node)
-
-    test_vector = [
-        {"input": -2, "expected_result": 0},
-        {"input": -1, "expected_result": 0},
-        {"input": 0, "expected_result": 0},
-        {"input": 1, "expected_result": 2},
-        {"input": 2, "expected_result": 2},
-        {"input": 3, "expected_result": 4},
-        {"input": 4, "expected_result": 4},
-        {"input": 7, "expected_result": 8},
-        {"input": 8, "expected_result": 8},
-        {"input": 11, "expected_result": 16},
-        {"input": 15, "expected_result": 16},
-        {"input": 16, "expected_result": 16},
-        {"input": 18, "expected_result": 32},
-        {"input": 27, "expected_result": 32},
-        {"input": 31, "expected_result": 32},
-        {"input": 32, "expected_result": 32},
-        {"input": 42, "expected_result": 64},
-        {"input": 65, "expected_result": 128},
-    ]
-
-    for test_dict in test_vector:
-        output = tbs_inst.find_next_power_of_2(test_dict["input"])
-        assert output >= test_dict["input"]
-        assert output == test_dict["expected_result"]
-
-    return
-
-
 # Test brief: Prove that memory mode 'const' is not supported for this layer type
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
diff --git a/tests/util/test_basic.py b/tests/util/test_basic.py
new file mode 100755
index 0000000000..d2586f4f19
--- /dev/null
+++ b/tests/util/test_basic.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import finn.util.basic as basic
+
+
+@pytest.mark.util
+def test_next_power_of_2():
+    test_vector = [
+        {"input": -2, "expected_result": 0},
+        {"input": -1, "expected_result": 0},
+        {"input": 0, "expected_result": 0},
+        {"input": 1, "expected_result": 2},
+        {"input": 2, "expected_result": 2},
+        {"input": 3, "expected_result": 4},
+        {"input": 4, "expected_result": 4},
+        {"input": 7, "expected_result": 8},
+        {"input": 8, "expected_result": 8},
+        {"input": 11, "expected_result": 16},
+        {"input": 15, "expected_result": 16},
+        {"input": 16, "expected_result": 16},
+        {"input": 18, "expected_result": 32},
+        {"input": 27, "expected_result": 32},
+        {"input": 31, "expected_result": 32},
+        {"input": 32, "expected_result": 32},
+        {"input": 42, "expected_result": 64},
+        {"input": 65, "expected_result": 128},
+    ]
+
+    for test_dict in test_vector:
+        output = basic.find_next_power_of_2(test_dict["input"])
+        assert output >= test_dict["input"]
+        assert output == test_dict["expected_result"]
+
+    return

From 45bb19f2821bde10cf7303a193869160fd46c72e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 11:22:08 +0000
Subject: [PATCH 045/111] [thresholding] remove find_next_power_of_2() from
 thresholding binary search CustomOp class

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 21 ++-----------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 9cbe049be3..c681bb2631 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -38,7 +38,7 @@
 )
 
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import get_rtlsim_trace_depth, make_build_dir
+from finn.util.basic import get_rtlsim_trace_depth, make_build_dir, find_next_power_of_2
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     pack_innermost_dim_as_hex_string,
@@ -647,23 +647,6 @@ def get_verilog_top_module_intf_names(self):
 
         return intf_names
 
-    def find_next_power_of_2(self, n):
-        # Negative values will loop infinitely below - return 0
-        if n <= 0:
-            return 0
-        # If '1' is requested, output will be '0' in the loop below, avoid this now.
-        elif n == 1:
-            return 2  # i.e. 2**1
-
-        # decrement 'n' (to handle cases when `n` itself is a power of 2)
-        n = n - 1
-
-        # loop until only one bit is left
-        while n & n - 1:
-            # unset rightmost bit
-            n = n & n - 1
-        return n << 1
-
     def twos_comp(self, val, bitwidth):
         return (val + (1 << bitwidth)) % (1 << bitwidth)
 
@@ -678,7 +661,7 @@ def get_dynamic_config(self, model, address_stride=1):
         thresholds = model.get_initializer(self.onnx_node.input[1])
         num_channels, num_weights_per_channel = thresholds.shape
 
-        weight_addr_boundary = self.find_next_power_of_2(num_weights_per_channel)
+        weight_addr_boundary = find_next_power_of_2(num_weights_per_channel)
         # Make sure that the next power of 2 (output) is greater than the input
         assert weight_addr_boundary >= num_weights_per_channel
 

From ca0042225c006d4545e26b0e0f1221ecd4ab68c3 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 12:58:01 +0000
Subject: [PATCH 046/111] [thresholding] replace math functions with existing
 functions

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py  | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index c681bb2631..9113e4f9d9 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -647,12 +647,6 @@ def get_verilog_top_module_intf_names(self):
 
         return intf_names
 
-    def twos_comp(self, val, bitwidth):
-        return (val + (1 << bitwidth)) % (1 << bitwidth)
-
-    def prep_axilite_val(self, val):
-        return self.twos_comp(int(val), self.get_weight_datatype().bitwidth())
-
     def get_dynamic_config(self, model, address_stride=1):
         """Returns a configuration dictionary containing axilite write commands
         in order to program the thresholds into the RTL core during runtime.
@@ -677,7 +671,7 @@ def get_dynamic_config(self, model, address_stride=1):
                 )
                 config[key_name] = (
                     channel_start_addr + addr,
-                    self.prep_axilite_val(weight),
+                    int(str(pack_innermost_dim_as_hex_string([weight], self.get_weight_datatype(), self.get_weight_datatype().bitwidth())), 0),
                 )
 
                 weight_cntr += 1

From 7f3455fc0d1dafedaf8cdfca8144dea41747a624 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 13:16:11 +0000
Subject: [PATCH 047/111] [thresholding] remove convept of mem_mode for RTL
 thresholding binary search node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 38 ------------
 ...fpgadataflow_thresholding_binary_search.py | 58 +------------------
 2 files changed, 1 insertion(+), 95 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 9113e4f9d9..954850562e 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -88,19 +88,6 @@ def get_nodeattr_types(self):
             # [4] is four vectors (like a FC layer with batch=4)
             # [1, 4, 4] is four * four vectors (like a conv layer with batch=1)
             "numInputVectors": ("ints", False, [1]),
-            # memory mode for the thresholds
-            # const -- embedded thresholds, default
-            # decoupled -- streaming thresholds with streamer packaged inside IP
-            "mem_mode": ("s", False, "const", {"const", "decoupled"}),
-            # (mem_mode = decoupled only) whether weights (thresholds) will be
-            # writable through an AXI-lite interface during runtime
-            # 1 for enabled, 0 for disabled.
-            # see finn-rtllib/memstream/doc/README for more about the memory
-            # address map used for writable weights
-            # IMPORTANT: After using AXI lite to either read or write the weights,
-            # always "flush" the accelerator by first passing a dummy input
-            # vector through the accelerator. This will get rid of any old
-            # weight data from the weight FIFOs.
             "gen_top_module": ("s", False, ""),
             "activation_bias": ("i", False, 0),
             "clkFreq": ("i", False, 200000000),
@@ -150,12 +137,6 @@ def get_outstream_width(self, ind=0):
         return o_bits * self.get_nodeattr("PE")
 
     def get_weightstream_width(self):
-        # Only 'decoupled' mode is supported
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled":
-            raise Exception(
-                "Unrecognized memory mode for this node: {}".format(mem_mode)
-            )
         pe = self.get_nodeattr("PE")
         wp = self.get_weight_datatype().bitwidth()
         n_thres_steps = self.get_nodeattr("numSteps")
@@ -442,13 +423,6 @@ def code_generation_ipgen(self, model, fpgapart, clk):
         self.generate_params(model, code_gen_dir)
 
     def generate_params(self, model, path):
-        # Only 'decoupled' mode is supported
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled":
-            raise Exception(
-                "Unrecognized memory mode for this node: {}".format(mem_mode)
-            )
-
         code_gen_dir = path
         weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir)
         thresholds = model.get_initializer(self.onnx_node.input[1])
@@ -508,12 +482,6 @@ def execute_node(self, context, graph):
                     self.get_nodeattr("exec_mode"), "rtlsim"
                 )
             )
-        if self.get_nodeattr("mem_mode") != "decoupled":
-            raise Exception(
-                "Invalid mem_mode value: {}; mem_mode must be set to '{}'".format(
-                    self.get_nodeattr("mem_mode"), "decoupled"
-                )
-            )
 
         node = self.onnx_node
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
@@ -635,12 +603,6 @@ def get_verilog_top_module_intf_names(self):
         Each block must have at most one aximm and one axilite."""
 
         intf_names = super().get_verilog_top_module_intf_names()
-        # Only 'decoupled' mode is supported - check before adding axilite interface
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode != "decoupled":
-            raise Exception(
-                "Unrecognized memory mode for this node: {}".format(mem_mode)
-            )
         intf_names["axilite"] = ["s_axilite"]
         intf_names["s_axis"] = [["s_axis"]]
         intf_names["m_axis"] = [["m_axis"]]
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 29fc2828b6..7ef5da8f23 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -93,7 +93,6 @@ def make_single_thresholding_binary_search_modelwrapper(
     input_data_type,
     output_data_type,
     activation_bias,
-    mem_mode,
     num_input_vecs,
 ):
 
@@ -121,7 +120,6 @@ def make_single_thresholding_binary_search_modelwrapper(
         weightDataType=input_data_type.name,
         outputDataType=output_data_type.name,
         activation_bias=activation_bias,
-        mem_mode=mem_mode,
         numInputVectors=num_input_vecs,
     )
     graph = helper.make_graph(
@@ -142,54 +140,6 @@ def make_single_thresholding_binary_search_modelwrapper(
     return model
 
 
-# Test brief: Prove that memory mode 'const' is not supported for this layer type
-@pytest.mark.fpgadataflow
-@pytest.mark.vivado
-def test_fpgadataflow_thresholding_binary_search_const_mem_mode():
-    input_data_type = DataType["INT16"]
-    activation = DataType["INT4"]
-    fold = -1
-    num_input_channels = 16
-    mem_mode = "const"
-
-    pe = generate_pe_value(fold, num_input_channels)
-    num_input_vecs = [1, 2, 2]
-    output_data_type = activation
-    activation_bias = output_data_type.min()
-
-    # Generate random thresholds and sort in ascending order
-    num_steps = activation.get_num_possible_values() - 1
-    thresholds = generate_random_threshold_values(
-        input_data_type, num_input_channels, num_steps
-    )
-
-    # Generate model from input parameters to the test
-    model = make_single_thresholding_binary_search_modelwrapper(
-        thresholds,
-        pe,
-        input_data_type,
-        output_data_type,
-        activation_bias,
-        mem_mode,
-        num_input_vecs,
-    )
-
-    # Prove that 'const' memory mode is not supported for this class
-    # 'const' memory mode is not supported for this class, catch the specific exception
-    # thrown by FINN. Exception: ('Unrecognized memory mode for this node:', 'const')
-    try:
-        model = model.transform(InsertFIFO(True))
-        model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
-        model = model.transform(HLSSynthIP())
-        model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
-    except Exception as e:
-        if str(e) != "Unrecognized memory mode for this node: {}".format(mem_mode):
-            raise
-        # Caught the expected exception, leave the test early
-        return
-
-
 # Test brief: Test that PrepareRTLSim() runs successfully. This function is not
 # tested in test_fpgadataflow_thresholding_binary_search()
 @pytest.mark.fpgadataflow
@@ -199,7 +149,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
     act = DataType["INT4"]
     fold = -1
     num_input_channels = 16
-    mem_mode = "decoupled"
 
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
@@ -226,7 +175,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
         input_data_type,
         output_data_type,
         activation_bias,
-        mem_mode,
         num_input_vecs,
     )
 
@@ -245,15 +193,12 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
 @pytest.mark.parametrize("fold", [-1])  # 1, 2, etc. will fail
 @pytest.mark.parametrize("num_input_channels", [16])
-# no need to test 'const' mode, it's already done in:
-# test_fpgadataflow_thresholding_binary_search_const_mem_mode()
-@pytest.mark.parametrize("mem_mode", ["decoupled"])
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
 @pytest.mark.slow
 def test_fpgadataflow_thresholding_binary_search(
-    activation, input_data_type, fold, num_input_channels, mem_mode, exec_mode
+    activation, input_data_type, fold, num_input_channels, exec_mode
 ):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
@@ -304,7 +249,6 @@ def test_fpgadataflow_thresholding_binary_search(
         input_data_type,
         output_data_type,
         activation_bias,
-        mem_mode,
         num_input_vecs,
     )
 

From 4bc69f1a374821b16b80826946223a0a36cae787 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 16:20:02 +0000
Subject: [PATCH 048/111] [thresholding] add methods needed for convertingToHls
 transformation

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 40 +++++++++++++++++--
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 954850562e..c342d235d9 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -101,10 +101,23 @@ def calc_tmem(self):
         return num_channels // pe
 
     def make_shape_compatible_op(self, model):
-        return []
+        oshape = self.get_normal_output_shape()
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
-        return
+        node = self.onnx_node
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "inputDataType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype().name),
+                str(idt.name),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("inputDataType", idt.name)
+        # set output datatype from property
+        odt = self.get_output_datatype()
+        model.set_tensor_datatype(node.output[0], odt)
 
     def verify_node(self):
         return []
@@ -126,7 +139,28 @@ def get_weight_datatype(self):
         return DataType[self.get_nodeattr("weightDataType")]
 
     def minimize_accumulator_width(self, model):
-        return None
+        "Minimize threshold width ('accumulator width' here due to convention)"
+        thresholds = model.get_initializer(self.onnx_node.input[1])
+        threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
+        min_threshold = thresholds.min()
+        max_threshold = thresholds.max()
+        min_input = self.get_input_datatype().min()
+        max_input = self.get_input_datatype().max()
+        # get range required by threshold values
+        tdt_min = min(min_input, min_threshold)
+        tdt_max = max(max_input, max_threshold)
+        if tdt_min < 0:
+            if abs(tdt_min) > tdt_max:
+                tdt = DataType.get_smallest_possible(tdt_min)
+            else:
+                tdt = DataType.get_smallest_possible(-tdt_max - 1)
+        else:
+            tdt = DataType.get_smallest_possible(tdt_max)
+        assert np.vectorize(tdt.allowed)(
+            threshold_tensor
+        ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+        self.set_nodeattr("weightDataType", tdt.name)
+        return DataType[self.get_nodeattr("weightDataType")]
 
     def get_instream_width(self, ind=0):
         i_bits = self.get_input_datatype().bitwidth()

From 3b6a1980b8ac28f5a809125d1e06eeb5ab2ba3b5 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 16:32:23 +0000
Subject: [PATCH 049/111] [thresholding] add convertingToHls transformation for
 thresholding binary search RTL node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/convert_to_hls_layers.py     | 93 +++++++++++++++----
 1 file changed, 73 insertions(+), 20 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 525af7ea92..17f839c5c5 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1062,9 +1062,10 @@ def apply(self, model):
 class InferThresholdingLayer(Transformation):
     """Convert any MultiThreshold into a standalone thresholding HLS layer."""
 
-    def __init__(self, mem_mode="const"):
+    def __init__(self, mem_mode="const", use_rtl_variant=False):
         super().__init__()
         self.mem_mode = mem_mode
+        self.use_rtl_variant = use_rtl_variant
 
     def apply(self, model):
         graph = model.graph
@@ -1118,26 +1119,78 @@ def apply(self, model):
                 )
                 actval = int(actval)
                 assert (not odt.signed()) or (actval < 0), (
-                    node.name + ": Signed output requres actval < 0"
-                )
-                # create and insert new Thresholding_Batch node
-                new_node = helper.make_node(
-                    "Thresholding_Batch",
-                    [thl_input, thl_threshold],
-                    [thl_output],
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                    NumChannels=ifc,
-                    PE=pe,
-                    numSteps=thl_thres_shape[1],
-                    inputDataType=idt.name,
-                    weightDataType=idt.name,  # will be set by MinimizeAccumulatorWidth
-                    outputDataType=odt.name,
-                    numInputVectors=list(thl_in_shape[:-1]),
-                    ActVal=actval,
-                    mem_mode=self.mem_mode,
-                    name="Thresholding_Batch_" + node.name,
+                    node.name + ": Signed output requires actval < 0"
                 )
+
+                # Ensure that RTL variant is not inserted for unsupported configuration
+                is_rtl_variant_compatible = True
+
+                # Perform checks for RTL variant if chosen
+                if self.use_rtl_variant:
+                    # Check memory mode
+                    if self.mem_mode != "decoupled":
+                        warnings.warn(
+                            """%s : RTL Thresholding does not support 'decoupled' memory mode.
+                            Falling back to HLS implementation."""
+                            % node.name
+                        )
+                        is_rtl_variant_compatible = False
+
+                    # Check PE/SIMD value
+                    if pe != 1:
+                        warnings.warn(
+                            """%s : RTL Thresholding does not support paralellisation.
+                            Only a PE value of 1 is supported.
+                            Falling back to HLS implementation."""
+                            % node.name
+                        )
+                        is_rtl_variant_compatible = False
+
+                if self.use_rtl_variant and is_rtl_variant_compatible:
+                    new_node = helper.make_node(
+                        "Thresholding_Binary_Search",
+                        [thl_input, thl_threshold],
+                        [thl_output],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        NumChannels=ifc,
+                        PE=pe,
+                        numSteps=thl_thres_shape[1],
+                        inputDataType=idt.name,
+                        weightDataType=idt.name,  # will be set by MinimizeAccumulatorWidth
+                        outputDataType=odt.name,
+                        numInputVectors=list(thl_in_shape[:-1]),
+                        activation_bias=actval,
+                        mem_mode=self.mem_mode,
+                        name="Thresholding_Binary_Search_" + node.name,
+                    )
+                else:
+                    if self.use_rtl_variant:
+                        warnings.warn(
+                        """%s : RTL Thresholding requested for unsupported
+                            configuration. Falling back to HLS implementation."""
+                        % node.name
+                    )
+
+                    # create and insert new Thresholding_Batch node
+                    new_node = helper.make_node(
+                        "Thresholding_Batch",
+                        [thl_input, thl_threshold],
+                        [thl_output],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        NumChannels=ifc,
+                        PE=pe,
+                        numSteps=thl_thres_shape[1],
+                        inputDataType=idt.name,
+                        weightDataType=idt.name,  # will be set by MinimizeAccumulatorWidth
+                        outputDataType=odt.name,
+                        numInputVectors=list(thl_in_shape[:-1]),
+                        ActVal=actval,
+                        mem_mode=self.mem_mode,
+                        name="Thresholding_Batch_" + node.name,
+                    )
+
                 graph.node.insert(insert_point, new_node)
                 # remove old node
                 graph.node.remove(node)

From b3800cd7e258cecb0466cb9238eeb37ff738d660 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 16:34:09 +0000
Subject: [PATCH 050/111] [thresholding] add test for convertingToHls
 transformation for thresholding binary search node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../test_convert_to_hls_thresholding.py       | 322 ++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100755 tests/fpgadataflow/test_convert_to_hls_thresholding.py

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
new file mode 100755
index 0000000000..30932638b6
--- /dev/null
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -0,0 +1,322 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from finn.core.rtlsim_exec import rtlsim_exec
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+test_fpga_part = "xczu3eg-sbva484-1-e"
+target_clk_ns = 5
+
+
+# Helper functions
+def sort_thresholds_increasing(thresholds):
+    return np.sort(thresholds, axis=1)
+
+
+def generate_random_threshold_values(input_data_type, num_input_channels, num_steps):
+    return np.random.randint(
+        input_data_type.min(),
+        input_data_type.max() + 1,
+        (num_input_channels, num_steps),
+    ).astype(np.float32)
+
+
+def generate_pe_value(fold, num_input_channels):
+    if fold == -1:
+        fold = num_input_channels
+    pe = num_input_channels // fold
+    assert num_input_channels % pe == 0
+    return pe
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NCHW to NHWC
+def convert_np_array_to_finn_data_layout(data):
+    return np.transpose(data, (0, 2, 3, 1))
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NHWC to NCHW
+def convert_np_array_to_standard_data_layout(data):
+    return np.transpose(data, (0, 3, 1, 2))
+
+
+def make_single_thresholding_binary_search_modelwrapper(
+    thresholds,
+    pe,
+    input_data_type,
+    output_data_type,
+    activation_bias,
+    num_input_vecs,
+):
+    NumChannels = thresholds.shape[0]
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+
+    node_inp_list = ["inp", "thresh"]
+
+    Thresholding_node = helper.make_node(
+        "Thresholding_Binary_Search",
+        node_inp_list,
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        NumChannels=NumChannels,
+        PE=pe,
+        numSteps=thresholds.shape[1],
+        inputDataType=input_data_type.name,
+        weightDataType=input_data_type.name,
+        outputDataType=output_data_type.name,
+        numInputVectors=num_input_vecs,
+        activation_bias=activation_bias,
+    )
+    graph = helper.make_graph(
+        nodes=[Thresholding_node],
+        name="thresholding_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="thresholding-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", input_data_type)
+    model.set_tensor_datatype("outp", output_data_type)
+
+    model.set_tensor_datatype("thresh", input_data_type)
+    model.set_initializer("thresh", thresholds)
+    return model
+
+
+def make_single_multithresholding_modelwrapper(
+    thresholds,
+    pe,
+    input_data_type,
+    output_data_type,
+    activation_bias,
+    num_input_vecs,
+):
+    NumChannels = thresholds.shape[0]
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+
+    node_inp_list = ["inp", "thresh"]
+
+    Multithresholding_node = helper.make_node(
+        "MultiThreshold",
+        node_inp_list,
+        ["outp"],
+        domain="qonnx.custom_op.general",
+        out_dtype=output_data_type.name,
+        out_bias=float(activation_bias),
+        out_scale=1.0,
+    )
+
+    graph = helper.make_graph(
+        nodes=[Multithresholding_node],
+        name="multithresholding_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="multithresholding-model")
+    model = ModelWrapper(model)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    model = model.transform(GiveUniqueNodeNames())
+
+    model.set_tensor_datatype("inp", input_data_type)
+    model.set_tensor_datatype("outp", output_data_type)
+
+    model.set_tensor_datatype("thresh", input_data_type)
+    model.set_initializer("thresh", thresholds)
+    return model
+
+
+@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
+@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
+@pytest.mark.parametrize("fold", [-1])
+@pytest.mark.parametrize("num_input_channels", [16])
+@pytest.mark.parametrize("mem_mode", ["decoupled", "const"])
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
+def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_input_channels, mem_mode):
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = activation.get_num_possible_values() - 1
+
+    # Cppsim is not supported for this node (as it is an RTL node)
+    if mem_mode == "const":
+        pytest.skip("const memory mode not supported for RTL Thresholding Binary Search node")
+    elif mem_mode != "decoupled":
+        raise Exception("Unknown mem_mode: {}".format(mem_mode))
+
+    if activation == DataType["BIPOLAR"]:
+        pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node")
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # generate random input data
+    tensor_shape = tuple(num_input_vecs + [num_input_channels])
+    x = gen_finn_dt_tensor(input_data_type, tensor_shape)
+
+    # Generate random thresholds and sort in ascending order
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
+
+    # provide non-decreasing/ascending thresholds
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    x_nhwc = convert_np_array_to_standard_data_layout(x)
+    y = multithreshold(x_nhwc, thresholds)
+
+    # convert back to NHWC for comparison to hw outputs
+    y = convert_np_array_to_finn_data_layout(y)
+    if activation == DataType["BIPOLAR"]:
+        # binary to bipolar
+        y = 2 * y - 1
+    else:
+        # signed offset
+        y += activation.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    model = model.transform(InsertFIFO(True))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    # Retrieve the axilite programming sequence for weights - for decoupled mode only
+    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+    tbs_inst = getCustomOp(tbs_node)
+    config = tbs_inst.get_dynamic_config(model)
+
+    # Reshape generated data (not from model)
+    oshape = model.get_tensor_shape("outp")
+    y_expected = y.reshape(oshape)
+
+    # Helper function that delivers the hook to program the thresholds via AXI-Lite
+    def config_hook(config):
+        if config is None:
+            return None
+
+        def write_thresh_config(sim):
+            # axi_name = "s_axilite_0_" # works
+            axi_name = getCustomOp(
+                model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+            ).get_verilog_top_module_intf_names()["axilite"][0]
+            axi_name += "_0_"
+
+            # Write config registers to the Threshold memory.
+            # The dictionary defines (addr, value) tuples.
+            for config_entry in config.values():
+                addr = config_entry[0]
+                val = config_entry[1]
+                axilite_write(sim, addr, val, basename=axi_name)
+
+            reset_rtlsim(sim)
+
+        return write_thresh_config
+
+    input_dict = {"inp": x}
+    rtlsim_exec(model, input_dict, pre_hook=config_hook(config))
+    y_produced = input_dict["outp"]
+    assert (y_produced == y_expected).all()
+
+    #### Make a Multithreshold graph and convert to thresholding binary search node
+    new_model = make_single_multithresholding_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    # Recreate the model using the ConvertToHLS transform
+    new_model = new_model.transform(to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True))
+    new_model = new_model.transform(InsertFIFO(True))
+    new_model = new_model.transform(GiveUniqueNodeNames())
+    new_model = new_model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    new_model = new_model.transform(HLSSynthIP())
+    new_model = new_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    input_dict = {"inp": x}
+    rtlsim_exec(new_model, input_dict, pre_hook=config_hook(config))
+    y_produced_new = input_dict["outp"]
+    assert (y_produced_new == y_expected).all()

From 11464d87c4857dd2227935c198adbb6115250fe3 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 16:35:32 +0000
Subject: [PATCH 051/111] [thresholding] skip tests with unsupported folding
 factor input

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py      | 6 +++++-
 .../test_fpgadataflow_thresholding_binary_search.py         | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 30932638b6..3b56f40d9c 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -194,7 +194,7 @@ def make_single_multithresholding_modelwrapper(
 
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
-@pytest.mark.parametrize("fold", [-1])
+@pytest.mark.parametrize("fold", [-1, 1, 2])
 @pytest.mark.parametrize("num_input_channels", [16])
 @pytest.mark.parametrize("mem_mode", ["decoupled", "const"])
 @pytest.mark.fpgadataflow
@@ -213,6 +213,10 @@ def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_i
     if activation == DataType["BIPOLAR"]:
         pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node")
 
+    # Paralellisation not supported for thresholding binary search rtl node
+    if pe != 1:
+        pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node")
+
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
     output_data_type = activation
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 7ef5da8f23..0be91a2569 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -191,7 +191,7 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 # N.B. - fold factor of '-1' is supported only (no PE/SIMD support)
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
-@pytest.mark.parametrize("fold", [-1])  # 1, 2, etc. will fail
+@pytest.mark.parametrize("fold", [-1, 1, 2])
 @pytest.mark.parametrize("num_input_channels", [16])
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 @pytest.mark.fpgadataflow
@@ -204,6 +204,10 @@ def test_fpgadataflow_thresholding_binary_search(
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
+    # Paralellisation not supported for thresholding binary search rtl node
+    if pe != 1:
+        pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node")
+
     # Cppsim is not supported for this node (as it is an RTL node)
     if exec_mode == "cppsim":
         pytest.skip("cppsim not supported for RTL Thresholding Binary Search node")

From e71b1c0e1487befd8ec04ac6ebcc0caf8d63b4a3 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 16:45:16 +0000
Subject: [PATCH 052/111] [thresholding] add comments for attributes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index c342d235d9..711e3a8270 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -88,8 +88,12 @@ def get_nodeattr_types(self):
             # [4] is four vectors (like a FC layer with batch=4)
             # [1, 4, 4] is four * four vectors (like a conv layer with batch=1)
             "numInputVectors": ("ints", False, [1]),
+            # name of the top module in verilog template. Used by PyVerilator
+            # and IPI generation
             "gen_top_module": ("s", False, ""),
+            # bias to be applied to outputs of the node
             "activation_bias": ("i", False, 0),
+            # used for IPI step
             "clkFreq": ("i", False, 200000000),
         }
         my_attrs.update(super().get_nodeattr_types())

From 3be1140fe68058c55fc1e3685609b6964ce7e993 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 17:01:05 +0000
Subject: [PATCH 053/111] [thresholding] replace min() with signed() function

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 711e3a8270..2073e95b41 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -393,7 +393,7 @@ def prepare_codegen_rtl_values(self):
 
         # Is the input datatype signed or unsigned?
         # The thresholding core needs to know this when comparing weights to inputs
-        if self.get_input_datatype().min() < 0:
+        if self.get_input_datatype().signed():
             code_gen_dict["$SIGN$"] = ["signed"]
         else:
             code_gen_dict["$SIGN$"] = ["unsigned"]

From e05effc20cd2e357f5bba38d2e320144b313c9f5 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 17:40:42 +0000
Subject: [PATCH 054/111] [thresholding] fix formatting from pre-commit

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 13 +++++++++--
 .../fpgadataflow/convert_to_hls_layers.py     |  6 ++---
 src/finn/util/basic.py                        |  1 +
 .../test_convert_to_hls_thresholding.py       | 22 ++++++++++++++-----
 ...fpgadataflow_thresholding_binary_search.py |  4 +++-
 5 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 2073e95b41..d5d5c48cce 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -38,7 +38,7 @@
 )
 
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import get_rtlsim_trace_depth, make_build_dir, find_next_power_of_2
+from finn.util.basic import find_next_power_of_2, get_rtlsim_trace_depth, make_build_dir
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     pack_innermost_dim_as_hex_string,
@@ -671,7 +671,16 @@ def get_dynamic_config(self, model, address_stride=1):
                 )
                 config[key_name] = (
                     channel_start_addr + addr,
-                    int(str(pack_innermost_dim_as_hex_string([weight], self.get_weight_datatype(), self.get_weight_datatype().bitwidth())), 0),
+                    int(
+                        str(
+                            pack_innermost_dim_as_hex_string(
+                                [weight],
+                                self.get_weight_datatype(),
+                                self.get_weight_datatype().bitwidth(),
+                            )
+                        ),
+                        0,
+                    ),
                 )
 
                 weight_cntr += 1
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 17f839c5c5..a0461bda82 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1167,10 +1167,10 @@ def apply(self, model):
                 else:
                     if self.use_rtl_variant:
                         warnings.warn(
-                        """%s : RTL Thresholding requested for unsupported
+                            """%s : RTL Thresholding requested for unsupported
                             configuration. Falling back to HLS implementation."""
-                        % node.name
-                    )
+                            % node.name
+                        )
 
                     # create and insert new Thresholding_Batch node
                     new_node = helper.make_node(
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 9a66cf90eb..8782bd7f8c 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -217,6 +217,7 @@ def is_exe(fpath):
 
     return None
 
+
 def find_next_power_of_2(n):
     # Negative values will loop infinitely below - return 0
     if n <= 0:
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 3b56f40d9c..d0502a9b74 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -40,8 +40,8 @@
 from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.basic import gen_finn_dt_tensor
 
-from finn.core.rtlsim_exec import rtlsim_exec
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.core.rtlsim_exec import rtlsim_exec
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -199,23 +199,31 @@ def make_single_multithresholding_modelwrapper(
 @pytest.mark.parametrize("mem_mode", ["decoupled", "const"])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
-def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_input_channels, mem_mode):
+def test_convert_to_hls_tbs_rtl_variant(
+    activation, input_data_type, fold, num_input_channels, mem_mode
+):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
     # Cppsim is not supported for this node (as it is an RTL node)
     if mem_mode == "const":
-        pytest.skip("const memory mode not supported for RTL Thresholding Binary Search node")
+        pytest.skip(
+            "const memory mode not supported for RTL Thresholding Binary Search node"
+        )
     elif mem_mode != "decoupled":
         raise Exception("Unknown mem_mode: {}".format(mem_mode))
 
     if activation == DataType["BIPOLAR"]:
-        pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node")
+        pytest.skip(
+            "Only negative activations are supported for RTL Thresholding Binary Search node"
+        )
 
     # Paralellisation not supported for thresholding binary search rtl node
     if pe != 1:
-        pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node")
+        pytest.skip(
+            "Paralellisation of IP not supported for RTL Thresholding Binary Search node"
+        )
 
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
@@ -313,7 +321,9 @@ def write_thresh_config(sim):
     )
 
     # Recreate the model using the ConvertToHLS transform
-    new_model = new_model.transform(to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True))
+    new_model = new_model.transform(
+        to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True)
+    )
     new_model = new_model.transform(InsertFIFO(True))
     new_model = new_model.transform(GiveUniqueNodeNames())
     new_model = new_model.transform(PrepareIP(test_fpga_part, target_clk_ns))
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 0be91a2569..f1a03a3a89 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -206,7 +206,9 @@ def test_fpgadataflow_thresholding_binary_search(
 
     # Paralellisation not supported for thresholding binary search rtl node
     if pe != 1:
-        pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node")
+        pytest.skip(
+            "Paralellisation of IP not supported for RTL Thresholding Binary Search node"
+        )
 
     # Cppsim is not supported for this node (as it is an RTL node)
     if exec_mode == "cppsim":

From 48c33042bbc7b17f98510a8299504e4d36c3a2e8 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 17:47:16 +0000
Subject: [PATCH 055/111] [thresholding] fix more flake8 formatting

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/convert_to_hls_layers.py            |  4 ++--
 .../fpgadataflow/test_convert_to_hls_thresholding.py | 12 +++++-------
 .../test_fpgadataflow_thresholding_binary_search.py  |  5 ++---
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index a0461bda82..f6dd466fab 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1157,7 +1157,7 @@ def apply(self, model):
                         PE=pe,
                         numSteps=thl_thres_shape[1],
                         inputDataType=idt.name,
-                        weightDataType=idt.name,  # will be set by MinimizeAccumulatorWidth
+                        weightDataType=idt.name,
                         outputDataType=odt.name,
                         numInputVectors=list(thl_in_shape[:-1]),
                         activation_bias=actval,
@@ -1183,7 +1183,7 @@ def apply(self, model):
                         PE=pe,
                         numSteps=thl_thres_shape[1],
                         inputDataType=idt.name,
-                        weightDataType=idt.name,  # will be set by MinimizeAccumulatorWidth
+                        weightDataType=idt.name,
                         outputDataType=odt.name,
                         numInputVectors=list(thl_in_shape[:-1]),
                         ActVal=actval,
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index d0502a9b74..2785d91617 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -42,14 +42,10 @@
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.rtlsim_exec import rtlsim_exec
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5
@@ -209,14 +205,16 @@ def test_convert_to_hls_tbs_rtl_variant(
     # Cppsim is not supported for this node (as it is an RTL node)
     if mem_mode == "const":
         pytest.skip(
-            "const memory mode not supported for RTL Thresholding Binary Search node"
+            "const memory mode not supported for " \
+            "RTL Thresholding Binary Search node"
         )
     elif mem_mode != "decoupled":
         raise Exception("Unknown mem_mode: {}".format(mem_mode))
 
     if activation == DataType["BIPOLAR"]:
         pytest.skip(
-            "Only negative activations are supported for RTL Thresholding Binary Search node"
+            "Only negative activations are supported for " \
+            "RTL Thresholding Binary Search node"
         )
 
     # Paralellisation not supported for thresholding binary search rtl node
@@ -310,7 +308,7 @@ def write_thresh_config(sim):
     y_produced = input_dict["outp"]
     assert (y_produced == y_expected).all()
 
-    #### Make a Multithreshold graph and convert to thresholding binary search node
+    # Make a Multithreshold graph and convert to thresholding binary search node
     new_model = make_single_multithresholding_modelwrapper(
         thresholds,
         pe,
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index f1a03a3a89..a4eab1e181 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -39,11 +39,9 @@
 from qonnx.util.basic import gen_finn_dt_tensor
 
 from finn.core.rtlsim_exec import rtlsim_exec
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
@@ -207,7 +205,8 @@ def test_fpgadataflow_thresholding_binary_search(
     # Paralellisation not supported for thresholding binary search rtl node
     if pe != 1:
         pytest.skip(
-            "Paralellisation of IP not supported for RTL Thresholding Binary Search node"
+            "Paralellisation of IP not supported for " \
+            "RTL Thresholding Binary Search node"
         )
 
     # Cppsim is not supported for this node (as it is an RTL node)

From 1e8a36ca3712100caeed506976a92c7e2ee4b4c4 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 17:55:20 +0000
Subject: [PATCH 056/111] [thresholding] remove backslashes for flake8

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py        | 4 ++--
 .../test_fpgadataflow_thresholding_binary_search.py           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 2785d91617..217ee39d74 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -205,7 +205,7 @@ def test_convert_to_hls_tbs_rtl_variant(
     # Cppsim is not supported for this node (as it is an RTL node)
     if mem_mode == "const":
         pytest.skip(
-            "const memory mode not supported for " \
+            "const memory mode not supported for "
             "RTL Thresholding Binary Search node"
         )
     elif mem_mode != "decoupled":
@@ -213,7 +213,7 @@ def test_convert_to_hls_tbs_rtl_variant(
 
     if activation == DataType["BIPOLAR"]:
         pytest.skip(
-            "Only negative activations are supported for " \
+            "Only negative activations are supported for "
             "RTL Thresholding Binary Search node"
         )
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index a4eab1e181..049d65835f 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -205,7 +205,7 @@ def test_fpgadataflow_thresholding_binary_search(
     # Paralellisation not supported for thresholding binary search rtl node
     if pe != 1:
         pytest.skip(
-            "Paralellisation of IP not supported for " \
+            "Paralellisation of IP not supported for "
             "RTL Thresholding Binary Search node"
         )
 

From 08f1b5f49e0d5180fa739056209bc5f0a8589c7e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 18:00:57 +0000
Subject: [PATCH 057/111] [thresholding] more flake8 fixes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 217ee39d74..45705dc833 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -205,8 +205,7 @@ def test_convert_to_hls_tbs_rtl_variant(
     # Cppsim is not supported for this node (as it is an RTL node)
     if mem_mode == "const":
         pytest.skip(
-            "const memory mode not supported for "
-            "RTL Thresholding Binary Search node"
+            "const memory mode not supported for this node"
         )
     elif mem_mode != "decoupled":
         raise Exception("Unknown mem_mode: {}".format(mem_mode))

From 481d773257e41ad04f2bb5e1b614decfac4312ab Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 18:02:55 +0000
Subject: [PATCH 058/111] [thresholding] undo flake8 fixes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 45705dc833..cee06ebec9 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -204,9 +204,7 @@ def test_convert_to_hls_tbs_rtl_variant(
 
     # Cppsim is not supported for this node (as it is an RTL node)
     if mem_mode == "const":
-        pytest.skip(
-            "const memory mode not supported for this node"
-        )
+        pytest.skip("const memory mode not supported for this node")
     elif mem_mode != "decoupled":
         raise Exception("Unknown mem_mode: {}".format(mem_mode))
 

From a51bef4e3ea906b056eb7fb3fbb114a2ae12b6aa Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 5 Jan 2023 18:04:59 +0000
Subject: [PATCH 059/111] [thresholding] another flake8 fix

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index cee06ebec9..07821983e1 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -217,7 +217,7 @@ def test_convert_to_hls_tbs_rtl_variant(
     # Paralellisation not supported for thresholding binary search rtl node
     if pe != 1:
         pytest.skip(
-            "Paralellisation of IP not supported for RTL Thresholding Binary Search node"
+            "Paralellisation not supported for RTL Thresholding Binary Search node"
         )
 
     # Other non-input parameters

From 2c313ad01465f66a9e6f367cf6552f64b6a1dab3 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:11:25 +0000
Subject: [PATCH 060/111] [thresholding] remove cppsim test file generation

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py   | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index d5d5c48cce..1a5faad72a 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -239,13 +239,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
             np.mod(orig_thres_matrix, 1), 0
         ).all(), "Need int threshold tensor"
         ret = orig_thres_matrix
-        # workaround for vivado_hls threshold bug
-        if ret[0][0] == 0 and n_thres_steps == 1:
-            ret = np.copy(ret)
-            ret[0][0] = 1
-            warnings.warn(
-                "Setting 0-valued first threshold to 1 to avoid vivado_hls bug"
-            )
         # ensure channels = mh , duplicating if necessary
         if ret.shape[0] == 1:
             ret = np.tile(ret, (mh, 1))

From 49bdd28e4edc3d47ccb57161e073fcde2a2cb216 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:14:22 +0000
Subject: [PATCH 061/111] [thresholding] remove unnecessary data generation
 functions for simulators

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 107 ------------------
 1 file changed, 107 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 1a5faad72a..7b37b2029a 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -261,84 +261,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         rows between PEs is not as expected (n_thres_steps)"""
         return ret.reshape(1, pe, tmem, n_thres_steps)
 
-    def make_weight_file(self, weights, weight_file_mode, weight_file_name):
-        """Produce a file containing given weights (thresholds) in appropriate
-        format for this layer. This file can be used for either synthesis or
-        run-time reconfig of weights.
-
-        Arguments:
-        * weights : numpy array with weights to be put into the file
-        * weight_file_mode : one of {hls_header, decoupled_verilog_dat,
-          decoupled_runtime}
-        * weight_file_name : filename for the weight file to be generated
-        """
-        # There are 'decoupled_*' flavors, just make sure that the flavors
-        # are decoupled related
-        if "decoupled" not in weight_file_mode:
-            raise Exception(
-                "Unrecognized memory mode for this node: {}".format(weight_file_mode)
-            )
-
-        threshold_tensor = self.get_hls_compatible_threshold_tensor(weights)
-        tdt = self.get_weight_datatype()
-        assert np.vectorize(tdt.allowed)(
-            threshold_tensor
-        ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
-
-        # streaming thresholds need to be organized differently
-        # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps)
-        decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3))
-        # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps)
-        pe = self.get_nodeattr("PE")
-        n_thres_steps = self.get_nodeattr("numSteps")
-        decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2)
-        decoupled_thres = decoupled_thres.reshape(1, -1, pe * n_thres_steps)
-        decoupled_thres = decoupled_thres.copy()
-        decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape(
-            1, -1, pe * n_thres_steps
-        )
-        decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy()
-
-        if weight_file_mode == "decoupled_npy":
-            # save weight stream into npy for cppsim
-            np.save(weight_file_name, decoupled_thres)
-        elif weight_file_mode == "decoupled_verilog_dat":
-            # convert weight values into hexstring
-            weight_width = self.get_weightstream_width()
-            # pad to nearest 4 bits to get hex strings
-            weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
-            weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
-                decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix=""
-            )
-            weight_stream = weight_tensor_pe_flipped.flatten()
-            weight_stream = weight_stream.copy()
-            with open(weight_file_name, "w") as f:
-                for val in weight_stream:
-                    f.write(val + "\n")
-        elif weight_file_mode == "decoupled_runtime":
-            # memstream axi-lite interface will map each mem line to
-            # one or multiple 32-bit words
-            weight_width = self.get_weightstream_width()
-            words_per_memwidth = 2 ** ceil(log2(weight_width / 32))
-            if words_per_memwidth < 1:
-                words_per_memwidth = 1
-            weight_width_padded = words_per_memwidth * 32
-            # first, pack and ensure padding to 32 bits
-            weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
-                decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix=""
-            )
-            weight_stream = weight_tensor_pe_flipped.flatten()
-            weight_stream = weight_stream.copy()
-            with open(weight_file_name, "w") as f:
-                for val in weight_stream:
-                    # split into groups of 8 hex digits (= 32 bits)
-                    words_32b = textwrap.wrap(val, 8)
-                    words_32b.reverse()
-                    for word_32b in words_32b:
-                        f.write(word_32b + "\n")
-        else:
-            raise Exception("Decoupled weight export not yet implemented")
-
     # Get the integer from the DataType and string-ify it
     # This assumes that the data is in the form "INTx" or similar
     def conv_datatype_to_str(self, data_type):
@@ -449,35 +371,6 @@ def code_generation_ipgen(self, model, fpgapart, clk):
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
         self.set_nodeattr("ipgen_path", code_gen_dir)
         self.set_nodeattr("ip_path", code_gen_dir)
-
-        # Generate params for RTLSim
-        self.generate_params(model, code_gen_dir)
-
-    def generate_params(self, model, path):
-        code_gen_dir = path
-        weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir)
-        thresholds = model.get_initializer(self.onnx_node.input[1])
-        self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim)
-
-        # Verilog.dat thresholds:
-        # also save weights as Verilog .dat file
-        # note that we provide two different .dat files, one for synth
-        # and one for synthesis. this is because URAM-based weights always
-        # need zero weights for synthesis, otherwise they get inferred
-        # as BRAM
-        weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir)
-        weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir)
-        # sim weights are always the true weights
-        self.make_weight_file(
-            thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim
-        )
-
-        # Synthesis thresholds:
-        synth_thresholds = thresholds
-        self.make_weight_file(
-            synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth
-        )
-
         return
 
     def prepare_rtlsim(self):

From e663030e98dc6c1f194ccec1d8e5d65b9599c19c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:27:46 +0000
Subject: [PATCH 062/111] [thresholding] remove potentially problematic helper
 function

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding_binary_search.py             | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 7b37b2029a..b14eaa1669 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -261,14 +261,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         rows between PEs is not as expected (n_thres_steps)"""
         return ret.reshape(1, pe, tmem, n_thres_steps)
 
-    # Get the integer from the DataType and string-ify it
-    # This assumes that the data is in the form "INTx" or similar
-    def conv_datatype_to_str(self, data_type):
-        # Handle the case that an int is passed to the function
-        if isinstance(data_type, int):
-            return str(data_type)
-        return str(DataType[data_type].bitwidth())
-
     def prepare_codegen_rtl_values(self):
         """All dictionary values produced in this function are to replace
         their key value(s) in the RTL template files"""
@@ -294,16 +286,16 @@ def prepare_codegen_rtl_values(self):
         bias = self.get_nodeattr("activation_bias")  # activation bias value
 
         code_gen_dict["$N$"] = [
-            self.conv_datatype_to_str(output_data_type)
-        ]  # output precision
+            str(DataType[output_data_type].bitwidth())
+        ]  # output precision - convert bitwidth to string
         code_gen_dict["$M$"] = [
-            self.conv_datatype_to_str(input_data_type)
-        ]  # input/threshold precision
+            str(DataType[input_data_type].bitwidth())
+        ]  # input/threshold precision - convert bitwidth to string
         code_gen_dict["$C$"] = [
-            self.conv_datatype_to_str(num_channels)
+            str(num_channels)
         ]  # number of channels
         code_gen_dict["$BIAS$"] = [
-            self.conv_datatype_to_str(bias)
+            str(bias)
         ]  # activation bias value
 
         # Is the input datatype signed or unsigned?

From 42dbf23938fdd1a302e88706302980c718a66d05 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:35:36 +0000
Subject: [PATCH 063/111] [thresholding] implement flake8 formatting

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../custom_op/fpgadataflow/thresholding_binary_search.py  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index b14eaa1669..6dc9130792 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -291,12 +291,8 @@ def prepare_codegen_rtl_values(self):
         code_gen_dict["$M$"] = [
             str(DataType[input_data_type].bitwidth())
         ]  # input/threshold precision - convert bitwidth to string
-        code_gen_dict["$C$"] = [
-            str(num_channels)
-        ]  # number of channels
-        code_gen_dict["$BIAS$"] = [
-            str(bias)
-        ]  # activation bias value
+        code_gen_dict["$C$"] = [str(num_channels)]  # number of channels
+        code_gen_dict["$BIAS$"] = [str(bias)]  # activation bias value
 
         # Is the input datatype signed or unsigned?
         # The thresholding core needs to know this when comparing weights to inputs

From 933d7476d3336a6aec9c4dea852acb25ebdf4b46 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:38:11 +0000
Subject: [PATCH 064/111] [thresholding] remove unused imports

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 6dc9130792..9e1dd454f1 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -28,9 +28,7 @@
 
 import numpy as np
 import os
-import textwrap
 import warnings
-from math import ceil, log2
 from qonnx.core.datatype import DataType
 from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,

From 5c6dcd9b89a7b35328676855c5c5ac13e06da90f Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:40:13 +0000
Subject: [PATCH 065/111] [thresholding] remove last ununsed import

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 9e1dd454f1..a2e0f404b2 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -32,7 +32,6 @@
 from qonnx.core.datatype import DataType
 from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
-    roundup_to_integer_multiple,
 )
 
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp

From 51acd119eb8864ff302d0f040fcb0307c2778ccf Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 6 Jan 2023 11:42:12 +0000
Subject: [PATCH 066/111] [thresholding] reformat existing import

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index a2e0f404b2..595a643acc 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -30,9 +30,7 @@
 import os
 import warnings
 from qonnx.core.datatype import DataType
-from qonnx.util.basic import (
-    interleave_matrix_outer_dim_from_partitions,
-)
+from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions
 
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import find_next_power_of_2, get_rtlsim_trace_depth, make_build_dir

From b886a5ae08e608808795bc584da0650eb8ff260f Mon Sep 17 00:00:00 2001
From: auphelia <jakobapk@web.de>
Date: Wed, 18 Jan 2023 11:25:51 +0000
Subject: [PATCH 067/111] [Docs] Add bin search thresholding to docs generation

---
 docs/finn/source_code/finn.custom_op.fpgadataflow.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
index fdcf44c6d9..3627855cfb 100644
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -203,6 +203,14 @@ finn.custom\_op.fpgadataflow.thresholding\_batch
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.thresholding\_binary\_search
+-----------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.thresholding_binary_search
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 
 finn.custom\_op.fpgadataflow.tlastmarker
 -----------------------------------------------

From 2c3de2ab7ad12c89ee4af52e611532ff4255e258 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 23 Jan 2023 07:28:39 +0000
Subject: [PATCH 068/111] Corrected address width in Verilog wrapper for
 thresholding.

---
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index e3f8596bc8..5068cb549c 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -51,7 +51,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	// Writing
 	input	                s_axilite_AWVALID,
 	output	                s_axilite_AWREADY,
-	input	[C_BITS+N-1:0]  s_axilite_AWADDR,
+	input	[C_BITS+N+1:0]  s_axilite_AWADDR,
 
 	input	        s_axilite_WVALID,
 	output	        s_axilite_WREADY,

From 7c9f5d8805b288a299cd1970d797af0d24327577 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 11:57:16 +0000
Subject: [PATCH 069/111] [thresholding] remove bug affecting input width in
 top level wrapper

The C_BITS parameter is calculating the correct width needed for the top level wrapper for the thresholding binary search IP.
However, the parameter is not 'synthesizing' correctly and does not update the width for the affected s_axilite_AWADDR signal.
This results in the MSBs of the input signal being truncated. These missing bits affected addressing when writing weights into the core.
Weights were written to the incorrect addresses in the core causing incorrect thresholding to occur.

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 5068cb549c..768e7b6a5b 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -38,7 +38,6 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	parameter  C = $C$,	// Channels
 	int BIAS = $BIAS$,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
-	parameter  C_BITS = C < 2 ? 1 : $clog2(C),
 	parameter  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
@@ -49,9 +48,9 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 
 	//- AXI Lite ------------------------
 	// Writing
-	input	                s_axilite_AWVALID,
-	output	                s_axilite_AWREADY,
-	input	[C_BITS+N+1:0]  s_axilite_AWADDR,
+	input	                   s_axilite_AWVALID,
+	output	                   s_axilite_AWREADY,
+	input	[$clog2(C)+N+1:0]  s_axilite_AWADDR,
 
 	input	        s_axilite_WVALID,
 	output	        s_axilite_WREADY,

From 3a0d59dd6717daedb043ea83d6873e6c663b0d06 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 11:59:17 +0000
Subject: [PATCH 070/111] [thresholding] adjust thresholding binary search
 tests to use word addressing for programming thresholds with axilite

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py          | 2 +-
 .../test_fpgadataflow_thresholding_binary_search.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 07821983e1..9486513402 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -271,7 +271,7 @@ def test_convert_to_hls_tbs_rtl_variant(
     # Retrieve the axilite programming sequence for weights - for decoupled mode only
     tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
     tbs_inst = getCustomOp(tbs_node)
-    config = tbs_inst.get_dynamic_config(model)
+    config = tbs_inst.get_dynamic_config(model, 4)
 
     # Reshape generated data (not from model)
     oshape = model.get_tensor_shape("outp")
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 049d65835f..2a34971f0d 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -266,7 +266,7 @@ def test_fpgadataflow_thresholding_binary_search(
     # Retrieve the axilite programming sequence for weights - for decoupled mode only
     tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
     tbs_inst = getCustomOp(tbs_node)
-    config = tbs_inst.get_dynamic_config(model)
+    config = tbs_inst.get_dynamic_config(model, 4)
 
     # Reshape generated data (not from model)
     oshape = model.get_tensor_shape("outp")

From 757e3a1398948878e866f4fe5fe1747206a1c7d9 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 12:05:40 +0000
Subject: [PATCH 071/111] [thresholding] adjust typo in exception

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 595a643acc..f2f9e133b2 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -421,7 +421,7 @@ def execute_node(self, context, graph):
                     reshaped_input,
                 )
             elif in_ind > 2:
-                raise Exception("Unexpected input found for Thresholding_Batch")
+                raise Exception("Unexpected input found for Thresholding_Binary_Search")
             in_ind += 1
 
         # Create a PyVerilator wrapper of the RTLSim .so

From 479575b224559680c559c7af5fd4f09582529919 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 12:07:22 +0000
Subject: [PATCH 072/111] [thresholding] undo copyright header change - only
 needed for new files

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 0e17726d48..dc9a5a349a 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022, Advanced Micro Devices, Inc.
+# Copyright (c) 2020, Xilinx
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

From 0d99b6c8ed358b2feea41cc8af242d40b30c8d97 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 12:54:19 +0000
Subject: [PATCH 073/111] [thresholding] add docstring for migrated
 find_next_power_of_2() function

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/util/basic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 8782bd7f8c..ee185aa94f 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -219,6 +219,7 @@ def is_exe(fpath):
 
 
 def find_next_power_of_2(n):
+    """For any integer 'n', find the next greatest power of 2"""
     # Negative values will loop infinitely below - return 0
     if n <= 0:
         return 0

From 5a77a326558de1ecd59e61aae38575b73ac54b1b Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 12:55:40 +0000
Subject: [PATCH 074/111] [thresholding] add docstrings for methods not in base
 class

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/thresholding_binary_search.py       | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index f2f9e133b2..7dfcd91d58 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -95,6 +95,7 @@ def get_nodeattr_types(self):
         return my_attrs
 
     def calc_tmem(self):
+        """Calculates and returns TMEM."""
         num_channels = self.get_nodeattr("NumChannels")
         pe = self.get_nodeattr("PE")
         return num_channels // pe
@@ -104,6 +105,8 @@ def make_shape_compatible_op(self, model):
         return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
+        """Used for FINN DataType inference: set the output tensors' datatypes
+        accordingly for this node"""
         node = self.onnx_node
         idt = model.get_tensor_datatype(node.input[0])
         if idt != self.get_input_datatype():
@@ -119,6 +122,8 @@ def infer_node_datatype(self, model):
         model.set_tensor_datatype(node.output[0], odt)
 
     def verify_node(self):
+        """Required by the FINN nalysis module. Checks if custom ops in graph
+        are correctly built, with all attributes and inputs."""
         return []
 
     def bram_estimation(self):
@@ -170,6 +175,7 @@ def get_outstream_width(self, ind=0):
         return o_bits * self.get_nodeattr("PE")
 
     def get_weightstream_width(self):
+        """Returns weight stream width"""
         pe = self.get_nodeattr("PE")
         wp = self.get_weight_datatype().bitwidth()
         n_thres_steps = self.get_nodeattr("numSteps")
@@ -299,20 +305,24 @@ def prepare_codegen_rtl_values(self):
         return code_gen_dict
 
     def get_rtl_file_list(self):
+        """Thresholding binary search RTL file list"""
         return ["thresholding.sv", "thresholding_axi.sv", "thresholding_axi_wrapper.v"]
 
     def get_rtl_file_paths(self):
+        """Get full path of all RTL files"""
         rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/"
         rtl_file_list = self.get_rtl_file_list()
         rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list]
         return rtl_file_paths
 
     def get_rtl_template_data(self, path):
+        """Return RTL file contents as a template"""
         with open(path, "r") as f:
             template = f.read()
         return template
 
     def fill_in_rtl_template_data(self, replace_dict, template_data):
+        """Use attribute values to finn in RTL template placeholders"""
         template_data_cp = template_data
         for key in replace_dict:
             replacement_line = "\n".join(replace_dict[key])
@@ -320,11 +330,13 @@ def fill_in_rtl_template_data(self, replace_dict, template_data):
         return template_data_cp
 
     def dump_rtl_data(self, dest_dir, filename, data):
+        """Dump filled-in-template RTL files for future synthesis step"""
         with open(os.path.join(dest_dir, filename), "w") as f:
             f.write(data)
         return
 
     def generate_hdl(self):
+        """Prepare HDL files from templates for synthesis"""
         # Generate a dictionary of values to put in RTL template
         code_gen_dict = self.prepare_codegen_rtl_values()
 

From eeed0702125de77c293a4a702f213a1035829179 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Mon, 23 Jan 2023 12:56:22 +0000
Subject: [PATCH 075/111] [thresholding] remove unused method

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 7dfcd91d58..94182b4ea0 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -209,9 +209,6 @@ def get_number_output_values(self):
     def get_exp_cycles(self):
         return 0
 
-    def get_template_param_values(self):
-        return dict()
-
     def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         """Convert the original numpy weight matrix orig_weight_matrix into
         a form suitable for passing to the hlslib call:

From c2708686e22c9eaff18a5314c2f470fbbcb819f0 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 27 Jan 2023 14:55:16 +0000
Subject: [PATCH 076/111] [thresholding] remove 'return' at end of function -
 not needed

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/util/test_basic.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/util/test_basic.py b/tests/util/test_basic.py
index d2586f4f19..97a8c50261 100755
--- a/tests/util/test_basic.py
+++ b/tests/util/test_basic.py
@@ -58,5 +58,3 @@ def test_next_power_of_2():
         output = basic.find_next_power_of_2(test_dict["input"])
         assert output >= test_dict["input"]
         assert output == test_dict["expected_result"]
-
-    return

From af22177e50ae808072d87a9d0c5260ccb6c3a67f Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 27 Jan 2023 14:59:48 +0000
Subject: [PATCH 077/111] [thresholding] remove cppsim exec_mode from test -
 not exercised

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../test_fpgadataflow_thresholding_binary_search.py      | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index 2a34971f0d..e57c4942c8 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -191,12 +191,11 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
 @pytest.mark.parametrize("fold", [-1, 1, 2])
 @pytest.mark.parametrize("num_input_channels", [16])
-@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
 @pytest.mark.slow
 def test_fpgadataflow_thresholding_binary_search(
-    activation, input_data_type, fold, num_input_channels, exec_mode
+    activation, input_data_type, fold, num_input_channels
 ):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
@@ -209,12 +208,6 @@ def test_fpgadataflow_thresholding_binary_search(
             "RTL Thresholding Binary Search node"
         )
 
-    # Cppsim is not supported for this node (as it is an RTL node)
-    if exec_mode == "cppsim":
-        pytest.skip("cppsim not supported for RTL Thresholding Binary Search node")
-    elif exec_mode != "rtlsim":
-        raise Exception("Unknown exec_mode: {}".format(exec_mode))
-
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
     output_data_type = activation

From fab120b8218b2bacf8a94a23c7d250d0c5df12b6 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 27 Jan 2023 15:02:38 +0000
Subject: [PATCH 078/111] [thresholding] remove unused attributes

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 94182b4ea0..43ae8e8233 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -75,9 +75,6 @@ def get_nodeattr_types(self):
             "inputDataType": ("s", True, ""),
             "weightDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
-            # input and output FIFO depths
-            "inFIFODepth": ("i", False, 0),
-            "outFIFODepth": ("i", False, 0),
             # number of input vectors, examples:
             # [1] is a single vector (like a FC layer with batch=1)
             # [4] is four vectors (like a FC layer with batch=4)

From 5d6c964443e0c41865a18e862830e0c27a307dd1 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 27 Jan 2023 15:47:41 +0000
Subject: [PATCH 079/111] [thresholding] adjust i/o port names on thresholding
 RTL wrapper

Originally s_axis and m_axis port names on the thresholding RTL wrapper could synthesise, but did not adhere to the FINN i/o signal naming convention.
The FINN compiler would not recognise the IP being synthesised and would rely on the IP having the correct IP/signal wiring steps in place.
The FINN compiler did not recognise s_axis/m_axis signal naming and therefore did not automatically set the clock frequency of the IP to match the rest of the network.
This required a Tcl command to set the clock frequency of the IP, as well a user-configurable attribute to set the clock frequency for ease-of-use.

It turns out that this actually reduces user ease-of-use. Having the compiler take care of the clock signalling is preferred. To do this, the s_axis/m_axis
signals are renamed to in0_V/out_V, as the compiler expects, and this extra 'user configurability' can therefore be removed.

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../hdl/thresholding_axi_wrapper.v            | 26 ++++++++++---------
 .../thresholding_binary_search.py             | 19 --------------
 2 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index 768e7b6a5b..c16bf264dd 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -43,7 +43,9 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
 )(
 	//- Global Control ------------------
+		(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 	input	ap_clk,
+		(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 	input	ap_rst_n,
 
 	//- AXI Lite ------------------------
@@ -72,14 +74,14 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	output	[ 1:0]  s_axilite_RRESP,
 
 	//- AXI Stream - Input --------------
-	output	s_axis_tready,
-	input	s_axis_tvalid,
-	input	[((M+7)/8)*8-1:0]  s_axis_tdata,
+	output	in0_V_TREADY,
+	input	in0_V_TVALID,
+	input	[((M+7)/8)*8-1:0]  in0_V_TDATA,
 
 	//- AXI Stream - Output -------------
-	input	m_axis_tready,
-	output	m_axis_tvalid,
-	output	[((O_BITS+7)/8)*8-1:0]  m_axis_tdata
+	input	out_V_TREADY,
+	output	out_V_TVALID,
+	output	[((O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
 	$MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
@@ -113,14 +115,14 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 		.s_axilite_RRESP(s_axilite_RRESP),
 
 		//- AXI Stream - Input --------------
-		.s_axis_tready(s_axis_tready),
-		.s_axis_tvalid(s_axis_tvalid),
-		.s_axis_tdata(s_axis_tdata),
+		.s_axis_tready(in0_V_TREADY),
+		.s_axis_tvalid(in0_V_TVALID),
+		.s_axis_tdata(in0_V_TDATA),
 
 		//- AXI Stream - Output -------------
-		.m_axis_tready(m_axis_tready),
-		.m_axis_tvalid(m_axis_tvalid),
-		.m_axis_tdata(m_axis_tdata)
+		.m_axis_tready(out_V_TREADY),
+		.m_axis_tvalid(out_V_TVALID),
+		.m_axis_tdata(out_V_TDATA)
 	);
 
 endmodule : $MODULE_NAME_AXI_WRAPPER$
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 43ae8e8233..97c9dd82c6 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -85,8 +85,6 @@ def get_nodeattr_types(self):
             "gen_top_module": ("s", False, ""),
             # bias to be applied to outputs of the node
             "activation_bias": ("i", False, 0),
-            # used for IPI step
-            "clkFreq": ("i", False, 200000000),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -477,10 +475,6 @@ def code_generation_ipi(self):
         cmd = []
         rtl_file_list = self.get_rtl_file_list()
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
-        node_name = self.onnx_node.name
-        dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
-        din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
-        clock_freq = self.get_nodeattr("clkFreq")
 
         for rtl_file in rtl_file_list:
             cmd.append(
@@ -493,16 +487,6 @@ def code_generation_ipi(self):
             % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)
         )
 
-        cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]"
-            % (clock_freq, "get_bd_intf_pins", node_name, din_name)
-        )
-
-        cmd.append(
-            "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]"
-            % (clock_freq, "get_bd_intf_pins", node_name, dout_name)
-        )
-
         return cmd
 
     def get_verilog_top_module_intf_names(self):
@@ -517,9 +501,6 @@ def get_verilog_top_module_intf_names(self):
 
         intf_names = super().get_verilog_top_module_intf_names()
         intf_names["axilite"] = ["s_axilite"]
-        intf_names["s_axis"] = [["s_axis"]]
-        intf_names["m_axis"] = [["m_axis"]]
-
         return intf_names
 
     def get_dynamic_config(self, model, address_stride=1):

From bdfa6cb97096680247b6648edf20d4c519dcad16 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 31 Jan 2023 11:41:24 +0000
Subject: [PATCH 080/111] [thresholding] remove duplicated test helper function

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../test_convert_to_hls_thresholding.py       | 54 +------------------
 1 file changed, 2 insertions(+), 52 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 9486513402..84521b395c 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -46,6 +46,8 @@
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from test_fpgadataflow_thresholding_binary_search import make_single_thresholding_binary_search_modelwrapper
+
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5
@@ -86,58 +88,6 @@ def convert_np_array_to_standard_data_layout(data):
     return np.transpose(data, (0, 3, 1, 2))
 
 
-def make_single_thresholding_binary_search_modelwrapper(
-    thresholds,
-    pe,
-    input_data_type,
-    output_data_type,
-    activation_bias,
-    num_input_vecs,
-):
-    NumChannels = thresholds.shape[0]
-
-    inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
-    )
-    outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
-    )
-
-    node_inp_list = ["inp", "thresh"]
-
-    Thresholding_node = helper.make_node(
-        "Thresholding_Binary_Search",
-        node_inp_list,
-        ["outp"],
-        domain="finn.custom_op.fpgadataflow",
-        backend="fpgadataflow",
-        NumChannels=NumChannels,
-        PE=pe,
-        numSteps=thresholds.shape[1],
-        inputDataType=input_data_type.name,
-        weightDataType=input_data_type.name,
-        outputDataType=output_data_type.name,
-        numInputVectors=num_input_vecs,
-        activation_bias=activation_bias,
-    )
-    graph = helper.make_graph(
-        nodes=[Thresholding_node],
-        name="thresholding_graph",
-        inputs=[inp],
-        outputs=[outp],
-    )
-
-    model = helper.make_model(graph, producer_name="thresholding-model")
-    model = ModelWrapper(model)
-
-    model.set_tensor_datatype("inp", input_data_type)
-    model.set_tensor_datatype("outp", output_data_type)
-
-    model.set_tensor_datatype("thresh", input_data_type)
-    model.set_initializer("thresh", thresholds)
-    return model
-
-
 def make_single_multithresholding_modelwrapper(
     thresholds,
     pe,

From 6809351c5210c87a199e8b4167fa54b2dd9a48c8 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 31 Jan 2023 12:24:41 +0000
Subject: [PATCH 081/111] [thresholding] assert on finding unsupported memory
 mode for thresholding binary search HLS conversion function

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/convert_to_hls_layers.py            | 12 ++++--------
 .../fpgadataflow/test_convert_to_hls_thresholding.py | 11 ++---------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index f6dd466fab..1a331b059f 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1127,14 +1127,10 @@ def apply(self, model):
 
                 # Perform checks for RTL variant if chosen
                 if self.use_rtl_variant:
-                    # Check memory mode
-                    if self.mem_mode != "decoupled":
-                        warnings.warn(
-                            """%s : RTL Thresholding does not support 'decoupled' memory mode.
-                            Falling back to HLS implementation."""
-                            % node.name
-                        )
-                        is_rtl_variant_compatible = False
+                    assert self.mem_mode == "decoupled", (
+                        """%s : RTL Thresholding only supports 'decoupled' memory mode."""
+                        % node.name
+                    )
 
                     # Check PE/SIMD value
                     if pe != 1:
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 84521b395c..d07ffd2cbf 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -142,22 +142,15 @@ def make_single_multithresholding_modelwrapper(
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
 @pytest.mark.parametrize("fold", [-1, 1, 2])
 @pytest.mark.parametrize("num_input_channels", [16])
-@pytest.mark.parametrize("mem_mode", ["decoupled", "const"])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
 def test_convert_to_hls_tbs_rtl_variant(
-    activation, input_data_type, fold, num_input_channels, mem_mode
+    activation, input_data_type, fold, num_input_channels,
 ):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
-    # Cppsim is not supported for this node (as it is an RTL node)
-    if mem_mode == "const":
-        pytest.skip("const memory mode not supported for this node")
-    elif mem_mode != "decoupled":
-        raise Exception("Unknown mem_mode: {}".format(mem_mode))
-
     if activation == DataType["BIPOLAR"]:
         pytest.skip(
             "Only negative activations are supported for "
@@ -267,7 +260,7 @@ def write_thresh_config(sim):
 
     # Recreate the model using the ConvertToHLS transform
     new_model = new_model.transform(
-        to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True)
+        to_hls.InferThresholdingLayer(mem_mode="decoupled", use_rtl_variant=True)
     )
     new_model = new_model.transform(InsertFIFO(True))
     new_model = new_model.transform(GiveUniqueNodeNames())

From 4515cf7c6d4e55f8dfca62b52b504e2666a6b497 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 31 Jan 2023 12:29:42 +0000
Subject: [PATCH 082/111] [thresholding] precommit fix

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index d07ffd2cbf..75c4ef599c 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -46,7 +46,9 @@
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from test_fpgadataflow_thresholding_binary_search import make_single_thresholding_binary_search_modelwrapper
+from test_fpgadataflow_thresholding_binary_search import (
+    make_single_thresholding_binary_search_modelwrapper,
+)
 
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
@@ -145,7 +147,10 @@ def make_single_multithresholding_modelwrapper(
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
 def test_convert_to_hls_tbs_rtl_variant(
-    activation, input_data_type, fold, num_input_channels,
+    activation,
+    input_data_type,
+    fold,
+    num_input_channels,
 ):
     # Handle inputs to the test
     pe = generate_pe_value(fold, num_input_channels)

From b51498ef84edcd9362f4f83270f9ae39f5d7980f Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 31 Jan 2023 12:39:39 +0000
Subject: [PATCH 083/111] [thresholding] precommit fix 2

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 75c4ef599c..09067564eb 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -39,6 +39,9 @@
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.basic import gen_finn_dt_tensor
+from test_fpgadataflow_thresholding_binary_search import (
+    make_single_thresholding_binary_search_modelwrapper,
+)
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.rtlsim_exec import rtlsim_exec
@@ -46,10 +49,6 @@
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from test_fpgadataflow_thresholding_binary_search import (
-    make_single_thresholding_binary_search_modelwrapper,
-)
-
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5

From ff3b2014d5de4bf2a98c321d14bce15a9862bf74 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 31 Jan 2023 12:48:23 +0000
Subject: [PATCH 084/111] [thresholding] precommit fix 3

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/transformation/fpgadataflow/convert_to_hls_layers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 1a331b059f..1bc5fee664 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1128,7 +1128,8 @@ def apply(self, model):
                 # Perform checks for RTL variant if chosen
                 if self.use_rtl_variant:
                     assert self.mem_mode == "decoupled", (
-                        """%s : RTL Thresholding only supports 'decoupled' memory mode."""
+                        """%s : RTL Thresholding only supports 'decoupled' memory
+                        mode."""
                         % node.name
                     )
 

From fc7e00db46414b88f3e1c3d3dc9dff4cf6bc84ff Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 23 Mar 2023 14:59:46 +0000
Subject: [PATCH 085/111] [thresholding] adjust templates so that .sv files are
 modular and can be used as standalone IP

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv                | 4 ++--
 finn-rtllib/thresholding/hdl/thresholding_axi.sv            | 6 +++---
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v     | 2 +-
 .../custom_op/fpgadataflow/thresholding_binary_search.py    | 6 +-----
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index b26747d1ff..c7d5c86f6d 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -43,7 +43,7 @@
  *  threshold configuration relies on a channel address prefix. Inputs are
  *  accompanied by a channel selector.
  *****************************************************************************/
-module $MODULE_NAME$ #(
+module thresholding #(
 	int unsigned  N,  // output precision
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
@@ -153,4 +153,4 @@ module $MODULE_NAME$ #(
 	assign	ocnl = pipe[N].cnl;
 	assign	odat = pipe[N].res + BIAS;
 
-endmodule : $MODULE_NAME$
+endmodule : thresholding
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 5cd7746b82..79383c7996 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -38,7 +38,7 @@
  *	- performs aligned byte address to parameter word address translation.
  *****************************************************************************/
 
-module $MODULE_NAME_AXI$ #(
+module thresholding_axi #(
 	int unsigned  N,	// output precision
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
@@ -197,7 +197,7 @@ module $MODULE_NAME_AXI$ #(
 	end
 
 	// Core Thresholding Module
-	$MODULE_NAME$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
+	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
@@ -205,4 +205,4 @@ module $MODULE_NAME_AXI$ #(
 		.ovld, .ocnl(), .odat
 	);
 
-endmodule : $MODULE_NAME_AXI$
+endmodule : thresholding_axi
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index c16bf264dd..e46d0046ee 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -84,7 +84,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	output	[((O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
-	$MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 97c9dd82c6..9b02248185 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -259,11 +259,7 @@ def prepare_codegen_rtl_values(self):
         their key value(s) in the RTL template files"""
         code_gen_dict = {}
 
-        # Identify the module names
-        code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()]
-        code_gen_dict["$MODULE_NAME_AXI$"] = [
-            self.get_verilog_top_module_name() + "_axi"
-        ]
+        # Identify the module name
         code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [
             self.get_verilog_top_module_name() + "_axi_wrapper"
         ]

From f530aba05b05a59c5cd05b749666f89b82706cba Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 23 Mar 2023 15:50:43 +0000
Subject: [PATCH 086/111] [thresholding]: remove SIGN template in thresholding
 RTL and create parameter instead for more modular RTL

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv   | 18 +++++++++++-------
 .../thresholding/hdl/thresholding_axi.sv       |  3 ++-
 .../hdl/thresholding_axi_wrapper.v             |  3 ++-
 .../fpgadataflow/thresholding_binary_search.py |  4 ++--
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index c7d5c86f6d..deff4fe0f8 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -48,6 +48,7 @@ module thresholding #(
 	int unsigned  M,  // input/threshold precision
 	int unsigned  C,  // number of channels
 
+	bit SIGNED,	// signed inputs
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
 	int unsigned  C_BITS,
@@ -68,7 +69,7 @@ module thresholding #(
 	// Input Stream
 	input	logic  ivld,
 	input	logic        [C_BITS-1:0]  icnl,	// Ignored for C == 1
-	input	logic $SIGN$ [M     -1:0]  idat,
+	input	logic [M     -1:0]  idat,
 
 	// Output Stream
 	output	logic  ovld,
@@ -80,7 +81,7 @@ module thresholding #(
 	typedef struct packed {
 		logic                      vld;	// Valid data identification
 		logic        [C_BITS-1:0]  cnl;	// Channel
-		logic $SIGN$ [M     -1:0]  val;	// Original input value
+		logic [M     -1:0]  val;	// Original input value
 		logic        [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
 	} pipe_t;
 	uwire pipe_t  pipe[0:N];
@@ -91,13 +92,13 @@ module thresholding #(
 	for(genvar  stage = 0; stage < N; stage++) begin : genStages
 
 		// Threshold Memory
-		uwire $SIGN$ [M-1:0]  thresh;
+		uwire [M-1:0]  thresh;
 		if(1) begin : blkUpdate
 
 			// Write control: local select from global address
 			uwire  we = twe && tws[stage];
 			if((C == 1) && (stage == 0)) begin
-				logic $SIGN$ [M-1:0]  Thresh = 'x;
+				logic [M-1:0]  Thresh = 'x;
 				always_ff @(posedge clk) begin
 					if(rst)      Thresh <= 'x;
 					else if(we)  Thresh <= twd;
@@ -105,7 +106,7 @@ module thresholding #(
 				assign  thresh = Thresh;
 			end
 			else begin
-				logic $SIGN$ [M-1:0]  Threshs[C * 2**stage];
+				logic [M-1:0]  Threshs[C * 2**stage];
 				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
 				uwire [$clog2(C)+stage-1:0]  ra;
 				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
@@ -117,7 +118,7 @@ module thresholding #(
 				end
 
 				// Read
-				logic $SIGN$ [M-1:0]  RdReg;
+				logic [M-1:0]  RdReg;
 				always_ff @(posedge clk) begin
 					if(en)  RdReg <= Threshs[ra];
 				end
@@ -135,9 +136,12 @@ module thresholding #(
 
 		// Assemble pipeline data
 		logic [0:N-1]  res;
+		uwire  cmp =
+			SIGNED?      $signed(thresh) <=   $signed(State.val) :
+			/* else */ $unsigned(thresh) <= $unsigned(State.val);
 		always_comb begin
 			res        = State.res;
-			res[stage] = thresh <= State.val;	// Patch in next result bit
+			res[stage] = cmp;	// Patch in next result bit
 		end
 		assign	pipe[stage+1] = '{
 			vld: State.vld,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 79383c7996..6099a64746 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -43,6 +43,7 @@ module thresholding_axi #(
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
 
+	bit SIGNED,	// signed inputs
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
 	int unsigned O_BITS
@@ -197,7 +198,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
+	thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index e46d0046ee..caf850b5bc 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -36,6 +36,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	parameter  N = $N$,	// output precision
 	parameter  M = $M$,	// input/threshold precision
 	parameter  C = $C$,	// Channels
+	parameter  SIGNED = $SIGNED$,	// signed inputs
 	int BIAS = $BIAS$,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 
 	parameter  O_BITS = BIAS > 0?
@@ -84,7 +85,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	output	[((O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 9b02248185..af9e1173fb 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -286,9 +286,9 @@ def prepare_codegen_rtl_values(self):
         # Is the input datatype signed or unsigned?
         # The thresholding core needs to know this when comparing weights to inputs
         if self.get_input_datatype().signed():
-            code_gen_dict["$SIGN$"] = ["signed"]
+            code_gen_dict["$SIGNED$"] = [str(1)]
         else:
-            code_gen_dict["$SIGN$"] = ["unsigned"]
+            code_gen_dict["$SIGNED$"] = [str(0)]
 
         return code_gen_dict
 

From 3cd600cce8e1ff98161c55dce232d703173fa569 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 23 Mar 2023 16:20:49 +0000
Subject: [PATCH 087/111] [thresholding]: decouple thresholding core from axi
 wrapper by removing input parameters

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv            | 6 ++++--
 finn-rtllib/thresholding/hdl/thresholding_axi.sv        | 8 +++++---
 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index deff4fe0f8..52d0b41b33 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -51,8 +51,10 @@ module thresholding #(
 	bit SIGNED,	// signed inputs
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
-	int unsigned  C_BITS,
-	int unsigned O_BITS
+	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+	localparam int unsigned  O_BITS = BIAS >= 0?
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
 )(
 	// Global Control
 	input	logic  clk,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 6099a64746..4bb3add13b 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -46,7 +46,10 @@ module thresholding_axi #(
 	bit SIGNED,	// signed inputs
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
-	int unsigned O_BITS
+	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+	localparam int unsigned  O_BITS = BIAS >= 0?
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
 )(
 	//- Global Control ------------------
 	input	logic  ap_clk,
@@ -173,7 +176,6 @@ module thresholding_axi #(
 
 	end : blkOutputDecouple
 
-	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C);
 	uwire  ivld = s_axis_tvalid;
 	uwire [C_BITS-1:0]  icnl;
 	uwire [M     -1:0]  idat = s_axis_tdata[M-1:0];
@@ -198,7 +200,7 @@ module thresholding_axi #(
 	end
 
 	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core (
+	thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) core (
 		.clk, .rst,
 		.twe, .twa, .twd,
 		.en,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index caf850b5bc..da013b667a 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -85,7 +85,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	output	[((O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),

From 54afa637d2b7beac8beca99979e2d727385b90f3 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Tue, 28 Mar 2023 17:17:13 +0100
Subject: [PATCH 088/111] [thresholding]: patch in PE value to the thresholding
 AXI module and wrapper

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../thresholding/hdl/thresholding_axi.sv      | 117 ++++++++++++------
 .../hdl/thresholding_axi_wrapper.v            |   7 +-
 .../thresholding_binary_search.py             |   2 +
 3 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 4bb3add13b..506e31b215 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -42,11 +42,14 @@ module thresholding_axi #(
 	int unsigned  N,	// output precision
 	int unsigned  M,	// input/threshold precision
 	int unsigned  C,	// Channels
+	int unsigned  PE,	// Processing Parallelism, requires C = M*PE
 
 	bit SIGNED,	// signed inputs
 	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
-	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+    localparam int unsigned  CF = 1 + (C-1)/PE,	// Channel Fold
+	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
+	localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE),
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
 		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
@@ -57,9 +60,9 @@ module thresholding_axi #(
 
 	//- AXI Lite ------------------------
 	// Writing
-	input	logic                    s_axilite_AWVALID,
-	output	logic                    s_axilite_AWREADY,
-	input	logic [$clog2(C)+N+1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+	input	logic                  s_axilite_AWVALID,
+	output	logic                  s_axilite_AWREADY,
+	input	logic [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
 
 	input	logic         s_axilite_WVALID,
 	output	logic         s_axilite_WREADY,
@@ -83,33 +86,53 @@ module thresholding_axi #(
 	//- AXI Stream - Input --------------
 	output	logic  s_axis_tready,
 	input	logic  s_axis_tvalid,
-	input	logic [((M+7)/8)*8-1:0]  s_axis_tdata,
+	input	logic [((PE*M+7)/8)*8-1:0]  s_axis_tdata,
 
 	//- AXI Stream - Output -------------
 	input	logic  m_axis_tready,
 	output	logic  m_axis_tvalid,
-	output	logic [((O_BITS+7)/8)*8-1:0]  m_axis_tdata
+	output	logic [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
+	//- Parameter Constraints Checking --------------------------------------
+	initial begin
+		if(C%PE != 0) begin
+			$error("%m: Channel count C=%0d is not a multiple of PE=%0d.", C, PE);
+			$finish;
+		end
+	end
+
 	//- Global Control ------------------------------------------------------
 	uwire  clk = ap_clk;
 	uwire  rst = !ap_rst_n;
 
 	//- AXI Lite: Threshold Configuration -----------------------------------
-	uwire  twe;
-	uwire [$clog2(C)+N-1:0]  twa;
-	uwire [          M-1:0]  twd;
+	uwire  twe[PE];
+	uwire [$clog2(CF)+N-1:0]  twa;
+	uwire [           M-1:0]  twd;
 	if(1) begin : blkAxiLite
 		logic  WABusy = 0;
 		logic  WDBusy = 0;
-		logic [$clog2(C)+N-1:0]  Addr = 'x;
-		logic [          M-1:0]  Data = 'x;
+		logic  Sel[PE] = '{ default: 'x };
+		logic [$clog2(CF)+N-1:0]  Addr = 'x;
+		logic [           M-1:0]  Data = 'x;
 
-		assign	twe = WABusy && WDBusy;
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			assign	twe[pe] = WABusy && WDBusy && Sel[pe];
+		end
 		assign	twa = Addr;
 		assign	twd = Data;
 
-		uwire  clr_wr = rst || (twe && s_axilite_BREADY);
-		always_ff @(posedge clk) begin : blockName
+		if(PE == 1)  always_comb  Sel[0] = 1;
+		else begin
+			always_ff @(posedge clk) begin
+				if(!WABusy) begin
+					foreach(Sel[pe])  Sel[pe] <= s_axilite_AWADDR[N+2+:$clog2(PE)] == pe;
+				end
+			end
+		end
+
+		uwire  clr_wr = rst || (WABusy && WDBusy && s_axilite_BREADY);
+		always_ff @(posedge clk) begin
 			if(clr_wr) begin
 				WABusy <= 0;
 				Addr <= 'x;
@@ -119,7 +142,8 @@ module thresholding_axi #(
 			else begin
 				if(!WABusy) begin
 					WABusy <= s_axilite_AWVALID;
-					Addr   <= s_axilite_AWADDR[$clog2(C)+N+1:2];
+					Addr[0+:N] <= s_axilite_AWADDR[2+:N];
+					if(C > 1)  Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)];
 				end
 				if(!WDBusy) begin
 					WDBusy <= s_axilite_WVALID;
@@ -148,39 +172,48 @@ module thresholding_axi #(
 
 	//- IO-Sandwich with two-stage output buffer for containing a local enable
 	uwire  en;
-	uwire [O_BITS-1:0]  odat;
-	uwire  ovld;
+	uwire [PE-1:0][O_BITS-1:0]  odat;
+	uwire  ovld[PE];
 	if(1) begin : blkOutputDecouple
 		typedef struct {
-			logic          vld;
-			logic [O_BITS-1:0]  dat;
+			logic  vld;
+			logic [PE-1:0][O_BITS-1:0]  dat;
 		} buf_t;
-		buf_t  Buf[2] = '{ default: '{ vld: 0, dat: 'x } };
+		buf_t  A = '{ vld: 0, dat: 'x };
+		buf_t  B = '{ vld: 0, dat: 'x };
 		always_ff @(posedge clk) begin
-			if(rst)  Buf <= '{ default: '{ vld: 0, dat: 'x } };
+			if(rst) begin
+				A <= '{ vld: 0, dat: 'x };
+				B <= '{ vld: 0, dat: 'x };
+			end
 			else begin
-				if(!Buf[1].vld || m_axis_tready) begin
-					Buf[1] <= '{
-						vld: Buf[0].vld || ovld,
-						dat: Buf[0].vld? Buf[0].dat : odat
+				if(!B.vld || m_axis_tready) begin
+					B <= '{
+						vld: A.vld || ovld[0],
+						dat: A.vld? A.dat : odat
 					};
 				end
-				Buf[0].vld <= Buf[1].vld && !m_axis_tready && (Buf[0].vld || ovld);
-				if(!Buf[0].vld)  Buf[0].dat <= odat;
+				A.vld <= B.vld && !m_axis_tready && (A.vld || ovld[0]);
+				if(!A.vld)  A.dat <= odat;
 			end
 		end
-		assign	en = !Buf[0].vld;
+		assign	en = !A.vld;
 
-		assign	m_axis_tvalid = Buf[1].vld;
-		assign	m_axis_tdata  = Buf[1].dat;
+		assign	m_axis_tvalid = B.vld;
+		assign	m_axis_tdata  = B.dat;
 
 	end : blkOutputDecouple
 
+	// localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE);
 	uwire  ivld = s_axis_tvalid;
 	uwire [C_BITS-1:0]  icnl;
-	uwire [M     -1:0]  idat = s_axis_tdata[M-1:0];
+	uwire [M     -1:0]  idat[PE];
+	for(genvar  pe = 0; pe < PE; pe++) begin
+		assign	idat[pe] = s_axis_tdata[pe*M+:M];
+	end
+
 	assign	s_axis_tready = en;
-	if(C == 1)  assign  icnl = 'x;
+	if(C == PE)  assign  icnl = 'x;
 	else begin
 		logic [C_BITS-1:0]  Chnl = 0;
 		logic               Last = 0;
@@ -193,19 +226,21 @@ module thresholding_axi #(
 			end
 			else if(inc) begin
 				Chnl <= Chnl + 1;
-				Last <= (~Chnl & (C-2)) == 0;
+				Last <= (~Chnl & (C/PE-2)) == 0;
 			end
 		end
 		assign	icnl = Chnl;
 	end
 
-	// Core Thresholding Module
-	thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) core (
-		.clk, .rst,
-		.twe, .twa, .twd,
-		.en,
-		.ivld, .icnl, .idat,
-		.ovld, .ocnl(), .odat
-	);
+	// Core Thresholding Modules
+	for(genvar  pe = 0; pe < PE; pe++) begin : genCores
+		thresholding #(.N(N), .M(M), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core (
+			.clk, .rst,
+			.twe(twe[pe]), .twa, .twd,
+			.en,
+			.ivld, .icnl, .idat(idat[pe]),
+			.ovld(ovld[pe]), .ocnl(), .odat(odat[pe])
+		);
+	end : genCores
 
 endmodule : thresholding_axi
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index da013b667a..c27480f388 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -38,6 +38,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	parameter  C = $C$,	// Channels
 	parameter  SIGNED = $SIGNED$,	// signed inputs
 	int BIAS = $BIAS$,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
+	parameter  PE = $PE$,
 
 	parameter  O_BITS = BIAS > 0?
 		/* unsigned */ $clog2(2**N-BIAS) :
@@ -77,15 +78,15 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	//- AXI Stream - Input --------------
 	output	in0_V_TREADY,
 	input	in0_V_TVALID,
-	input	[((M+7)/8)*8-1:0]  in0_V_TDATA,
+	input	[((PE*M+7)/8)*8-1:0]  in0_V_TDATA,
 
 	//- AXI Stream - Output -------------
 	input	out_V_TREADY,
 	output	out_V_TVALID,
-	output	[((O_BITS+7)/8)*8-1:0]  out_V_TDATA
+	output	[((PE*O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
+	thresholding_axi #(.N(N), .M(M), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index af9e1173fb..e2453fcaad 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -273,6 +273,7 @@ def prepare_codegen_rtl_values(self):
         )  # input/threshold precision
         num_channels = self.get_nodeattr("NumChannels")  # number of channels
         bias = self.get_nodeattr("activation_bias")  # activation bias value
+        pe = self.get_nodeattr("PE")
 
         code_gen_dict["$N$"] = [
             str(DataType[output_data_type].bitwidth())
@@ -282,6 +283,7 @@ def prepare_codegen_rtl_values(self):
         ]  # input/threshold precision - convert bitwidth to string
         code_gen_dict["$C$"] = [str(num_channels)]  # number of channels
         code_gen_dict["$BIAS$"] = [str(bias)]  # activation bias value
+        code_gen_dict["$PE$"] = [str(pe)] # requires C = M*PE
 
         # Is the input datatype signed or unsigned?
         # The thresholding core needs to know this when comparing weights to inputs

From 29f9e1ce637d5df00cb1dae2ada3438070da0852 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Thu, 30 Mar 2023 15:43:36 +0100
Subject: [PATCH 089/111] [thresholding]: remove reset that erases the 0th
 stage threshold value

There is a corner case where the number of channels configured for the thresholding core is 1 and during programming the stage 0 threshold parameter.
For each other stage in this case (and all other cases) the threshold parameters are non-volatile. When a reset happens after programming the threshold parameters, all would still be intact except for the 0th stage threshold value.

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding.sv | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 52d0b41b33..0ce95ed3f9 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -102,8 +102,7 @@ module thresholding #(
 			if((C == 1) && (stage == 0)) begin
 				logic [M-1:0]  Thresh = 'x;
 				always_ff @(posedge clk) begin
-					if(rst)      Thresh <= 'x;
-					else if(we)  Thresh <= twd;
+					if(we)  Thresh <= twd;
 				end
 				assign  thresh = Thresh;
 			end

From 2c4c8e224f8921848713f6d121532ff345c84fd0 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 31 Mar 2023 10:43:00 +0100
Subject: [PATCH 090/111] [thresholding]: enable PE testing of RTL threhoslding
 binary search node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 .../fpgadataflow/convert_to_hls_layers.py             | 10 ----------
 .../fpgadataflow/test_convert_to_hls_thresholding.py  | 10 ++--------
 .../test_fpgadataflow_thresholding_binary_search.py   | 11 ++---------
 3 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index dedcc30a38..4c06a28b75 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1137,16 +1137,6 @@ def apply(self, model):
                         % node.name
                     )
 
-                    # Check PE/SIMD value
-                    if pe != 1:
-                        warnings.warn(
-                            """%s : RTL Thresholding does not support paralellisation.
-                            Only a PE value of 1 is supported.
-                            Falling back to HLS implementation."""
-                            % node.name
-                        )
-                        is_rtl_variant_compatible = False
-
                 if self.use_rtl_variant and is_rtl_variant_compatible:
                     new_node = helper.make_node(
                         "Thresholding_Binary_Search",
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 09067564eb..895c82d4ca 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -138,10 +138,10 @@ def make_single_multithresholding_modelwrapper(
     model.set_initializer("thresh", thresholds)
     return model
 
-
+# N.B. Fold values where C % PE != 0 fail
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
-@pytest.mark.parametrize("fold", [-1, 1, 2])
+@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6])
 @pytest.mark.parametrize("num_input_channels", [16])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
@@ -161,12 +161,6 @@ def test_convert_to_hls_tbs_rtl_variant(
             "RTL Thresholding Binary Search node"
         )
 
-    # Paralellisation not supported for thresholding binary search rtl node
-    if pe != 1:
-        pytest.skip(
-            "Paralellisation not supported for RTL Thresholding Binary Search node"
-        )
-
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
     output_data_type = activation
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
index e57c4942c8..24b60f5ea5 100755
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -186,10 +186,10 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
 
 # Test brief: Create a Thresholding binary search layer using various parameters
 # and test against a SW generated & simulated dataset
-# N.B. - fold factor of '-1' is supported only (no PE/SIMD support)
+# N.B. Fold values where C % PE != 0 fail
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
-@pytest.mark.parametrize("fold", [-1, 1, 2])
+@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6])
 @pytest.mark.parametrize("num_input_channels", [16])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
@@ -201,13 +201,6 @@ def test_fpgadataflow_thresholding_binary_search(
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
-    # Paralellisation not supported for thresholding binary search rtl node
-    if pe != 1:
-        pytest.skip(
-            "Paralellisation of IP not supported for "
-            "RTL Thresholding Binary Search node"
-        )
-
     # Other non-input parameters
     num_input_vecs = [1, 2, 2]
     output_data_type = activation

From 5d07a435c2994f0238fb41ec21381d75ea049796 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 31 Mar 2023 10:45:34 +0100
Subject: [PATCH 091/111] [thresholding]: add comment about why bipolar
 activations skipped for threhsolding binary search node

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 tests/fpgadataflow/test_convert_to_hls_thresholding.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index 895c82d4ca..f2d76c8416 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -155,6 +155,9 @@ def test_convert_to_hls_tbs_rtl_variant(
     pe = generate_pe_value(fold, num_input_channels)
     num_steps = activation.get_num_possible_values() - 1
 
+    # See convert_to_hls_layers::InferThresholdingLayer:
+    # assert (not odt.signed()) or (actval < 0)
+    # This implies that it expects a negative activation, BIPOLAR does not provide that
     if activation == DataType["BIPOLAR"]:
         pytest.skip(
             "Only negative activations are supported for "

From fcf579ce01075bbeb997580fbafc8cd9d64ed50c Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Fri, 31 Mar 2023 11:32:42 +0100
Subject: [PATCH 092/111] fix precommit issues

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +-
 tests/fpgadataflow/test_convert_to_hls_thresholding.py        | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index e2453fcaad..694d25bfaa 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -283,7 +283,7 @@ def prepare_codegen_rtl_values(self):
         ]  # input/threshold precision - convert bitwidth to string
         code_gen_dict["$C$"] = [str(num_channels)]  # number of channels
         code_gen_dict["$BIAS$"] = [str(bias)]  # activation bias value
-        code_gen_dict["$PE$"] = [str(pe)] # requires C = M*PE
+        code_gen_dict["$PE$"] = [str(pe)]  # requires C = M*PE
 
         # Is the input datatype signed or unsigned?
         # The thresholding core needs to know this when comparing weights to inputs
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
index f2d76c8416..9c233bdd06 100755
--- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -138,6 +138,7 @@ def make_single_multithresholding_modelwrapper(
     model.set_initializer("thresh", thresholds)
     return model
 
+
 # N.B. Fold values where C % PE != 0 fail
 @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])

From 6c9d1f50177de5bb1c91eacc061d0aa8adb9cf56 Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 5 Apr 2023 16:28:55 +0100
Subject: [PATCH 093/111] [thresholding] only adjust MSB thresholding
 addressing bits when channel fold factor is present

In the case where channel fold is not present (i.e. CF is 0), we saw incorrect threshold address programming.
Without this commit and when no channel folding is present, this if statement is always stepped through and
was damaging LSBs of the Addr signal, causing incorrect threshold address programming for a PE core.

Although the logic in the if statement looks correct (i.e. programming 0 bits (clog2(CF) => 0)) and should not
harm the Addr signal, it's best to avoid stepping through a case that does not exist (i.e., there is no channel
folding and each channel has its own PE; therefore no extra bits needed to program multiple channel thresholds
into a single PE core).

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 506e31b215..d2a7420a99 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -143,7 +143,7 @@ module thresholding_axi #(
 				if(!WABusy) begin
 					WABusy <= s_axilite_AWVALID;
 					Addr[0+:N] <= s_axilite_AWADDR[2+:N];
-					if(C > 1)  Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)];
+					if(CF > 1)  Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)];
 				end
 				if(!WDBusy) begin
 					WDBusy <= s_axilite_WVALID;

From b247ffbc258ec628a51c14822ec4343283ef5a2e Mon Sep 17 00:00:00 2001
From: Fionn O'Donohoe <fionno@xilinx.com>
Date: Wed, 5 Apr 2023 19:44:36 +0100
Subject: [PATCH 094/111] [thresholding] update binary search to match qonnx
 0.2.0

commit 65822357a7dba4f917c852d5f08bdebc7dd22e9d on dev moved all custom_ops
to be compatible with qonnx 0.2.0

Signed-off-by: Fionn O'Donohoe <fionno@xilinx.com>
---
 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
index 694d25bfaa..d02b778823 100755
--- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -60,8 +60,8 @@
 class Thresholding_Binary_Search(HLSCustomOp):
     """Class that corresponds to finn-rtllib 'thresholding' function."""
 
-    def __init__(self, onnx_node):
-        super().__init__(onnx_node)
+    def __init__(self, onnx_node, **kwargs):
+        super().__init__(onnx_node, **kwargs)
 
     def get_nodeattr_types(self):
         my_attrs = {

From 7be5ce412e5747f17fe0062769cd2cc476b5bfa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 17 Apr 2023 07:53:44 +0100
Subject: [PATCH 095/111] Defaulting BIAS and SIGNED parameters. Renaming M to
 K avoiding naming collision with uniform option.

---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 28 +++++++++----------
 .../thresholding/hdl/thresholding_axi.sv      | 27 +++++++++---------
 .../hdl/thresholding_axi_wrapper.v            | 18 ++++++------
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 0ce95ed3f9..d16a9219d7 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -45,11 +45,11 @@
  *****************************************************************************/
 module thresholding #(
 	int unsigned  N,  // output precision
-	int unsigned  M,  // input/threshold precision
+	int unsigned  K,  // input/threshold precision
 	int unsigned  C,  // number of channels
 
-	bit SIGNED,	// signed inputs
-	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
+	bit  SIGNED = 1,  // signed inputs
+	int  BIAS   = 0,  // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
 	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -63,15 +63,15 @@ module thresholding #(
 	// Threshold Configuration
 	input	logic  twe,
 	input	logic [$clog2(C)+N-1:0]  twa,
-	input	logic [          M-1:0]  twd,
+	input	logic [          K-1:0]  twd,
 
 	// Clock Enable for Stream Processing
 	input	logic  en,
 
 	// Input Stream
 	input	logic  ivld,
-	input	logic        [C_BITS-1:0]  icnl,	// Ignored for C == 1
-	input	logic [M     -1:0]  idat,
+	input	logic [C_BITS-1:0]  icnl,	// Ignored for C == 1
+	input	logic [K     -1:0]  idat,
 
 	// Output Stream
 	output	logic  ovld,
@@ -81,10 +81,10 @@ module thresholding #(
 
 	// Pipeline Links & Feed
 	typedef struct packed {
-		logic                      vld;	// Valid data identification
-		logic        [C_BITS-1:0]  cnl;	// Channel
-		logic [M     -1:0]  val;	// Original input value
-		logic        [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
+		logic               vld;	// Valid data identification
+		logic [C_BITS-1:0]  cnl;	// Channel
+		logic [K     -1:0]  val;	// Original input value
+		logic [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
 	} pipe_t;
 	uwire pipe_t  pipe[0:N];
 	assign	pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} };	// Feed original input
@@ -94,20 +94,20 @@ module thresholding #(
 	for(genvar  stage = 0; stage < N; stage++) begin : genStages
 
 		// Threshold Memory
-		uwire [M-1:0]  thresh;
+		uwire [K-1:0]  thresh;
 		if(1) begin : blkUpdate
 
 			// Write control: local select from global address
 			uwire  we = twe && tws[stage];
 			if((C == 1) && (stage == 0)) begin
-				logic [M-1:0]  Thresh = 'x;
+				logic [K-1:0]  Thresh = 'x;
 				always_ff @(posedge clk) begin
 					if(we)  Thresh <= twd;
 				end
 				assign  thresh = Thresh;
 			end
 			else begin
-				logic [M-1:0]  Threshs[C * 2**stage];
+				logic [K-1:0]  Threshs[C * 2**stage];
 				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
 				uwire [$clog2(C)+stage-1:0]  ra;
 				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
@@ -119,7 +119,7 @@ module thresholding #(
 				end
 
 				// Read
-				logic [M-1:0]  RdReg;
+				logic [K-1:0]  RdReg;
 				always_ff @(posedge clk) begin
 					if(en)  RdReg <= Threshs[ra];
 				end
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index d2a7420a99..2f0393a3e7 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -40,16 +40,15 @@
 
 module thresholding_axi #(
 	int unsigned  N,	// output precision
-	int unsigned  M,	// input/threshold precision
+	int unsigned  K,	// input/threshold precision
 	int unsigned  C,	// Channels
-	int unsigned  PE,	// Processing Parallelism, requires C = M*PE
+	int unsigned  PE,	// Processing Parallelism, requires C = k*PE
 
-	bit SIGNED,	// signed inputs
-	int BIAS,  // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
+	bit  SIGNED = 1,	// signed inputs
+	int  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-    localparam int unsigned  CF = 1 + (C-1)/PE,	// Channel Fold
+	localparam int unsigned  CF = 1 + (C-1)/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
-	localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE),
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
 		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
@@ -86,7 +85,7 @@ module thresholding_axi #(
 	//- AXI Stream - Input --------------
 	output	logic  s_axis_tready,
 	input	logic  s_axis_tvalid,
-	input	logic [((PE*M+7)/8)*8-1:0]  s_axis_tdata,
+	input	logic [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
 
 	//- AXI Stream - Output -------------
 	input	logic  m_axis_tready,
@@ -108,13 +107,13 @@ module thresholding_axi #(
 	//- AXI Lite: Threshold Configuration -----------------------------------
 	uwire  twe[PE];
 	uwire [$clog2(CF)+N-1:0]  twa;
-	uwire [           M-1:0]  twd;
+	uwire [           K-1:0]  twd;
 	if(1) begin : blkAxiLite
 		logic  WABusy = 0;
 		logic  WDBusy = 0;
 		logic  Sel[PE] = '{ default: 'x };
 		logic [$clog2(CF)+N-1:0]  Addr = 'x;
-		logic [           M-1:0]  Data = 'x;
+		logic [           K-1:0]  Data = 'x;
 
 		for(genvar  pe = 0; pe < PE; pe++) begin
 			assign	twe[pe] = WABusy && WDBusy && Sel[pe];
@@ -147,7 +146,7 @@ module thresholding_axi #(
 				end
 				if(!WDBusy) begin
 					WDBusy <= s_axilite_WVALID;
-					Data   <= s_axilite_WDATA[M-1:0];
+					Data   <= s_axilite_WDATA[K-1:0];
 				end
 			end
 		end
@@ -204,12 +203,12 @@ module thresholding_axi #(
 
 	end : blkOutputDecouple
 
-	// localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE);
+	localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE);
 	uwire  ivld = s_axis_tvalid;
 	uwire [C_BITS-1:0]  icnl;
-	uwire [M     -1:0]  idat[PE];
+	uwire [K     -1:0]  idat[PE];
 	for(genvar  pe = 0; pe < PE; pe++) begin
-		assign	idat[pe] = s_axis_tdata[pe*M+:M];
+		assign	idat[pe] = s_axis_tdata[pe*K+:K];
 	end
 
 	assign	s_axis_tready = en;
@@ -234,7 +233,7 @@ module thresholding_axi #(
 
 	// Core Thresholding Modules
 	for(genvar  pe = 0; pe < PE; pe++) begin : genCores
-		thresholding #(.N(N), .M(M), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core (
+		thresholding #(.N(N), .K(K), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core (
 			.clk, .rst,
 			.twe(twe[pe]), .twa, .twd,
 			.en,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
index c27480f388..2657b39d98 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -34,20 +34,20 @@
 
 module $MODULE_NAME_AXI_WRAPPER$ #(
 	parameter  N = $N$,	// output precision
-	parameter  M = $M$,	// input/threshold precision
+	parameter  K = $M$,	// input/threshold precision
 	parameter  C = $C$,	// Channels
-	parameter  SIGNED = $SIGNED$,	// signed inputs
-	int BIAS = $BIAS$,  // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS)
 	parameter  PE = $PE$,
+	parameter  SIGNED = $SIGNED$,	// signed inputs
+	parameter  BIAS = $BIAS$,		// offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
 
 	parameter  O_BITS = BIAS > 0?
-		/* unsigned */ $clog2(2**N-BIAS) :
-		/* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS)
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
 )(
 	//- Global Control ------------------
-		(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
+	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 	input	ap_clk,
-		(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
+	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 	input	ap_rst_n,
 
 	//- AXI Lite ------------------------
@@ -78,7 +78,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	//- AXI Stream - Input --------------
 	output	in0_V_TREADY,
 	input	in0_V_TVALID,
-	input	[((PE*M+7)/8)*8-1:0]  in0_V_TDATA,
+	input	[((PE*K+7)/8)*8-1:0]  in0_V_TDATA,
 
 	//- AXI Stream - Output -------------
 	input	out_V_TREADY,
@@ -86,7 +86,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #(
 	output	[((PE*O_BITS+7)/8)*8-1:0]  out_V_TDATA
 );
 
-	thresholding_axi #(.N(N), .M(M), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
+	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
 		//- Global Control ------------------
 		.ap_clk(ap_clk),
 		.ap_rst_n(ap_rst_n),

From a0120f2501eddd6b71f7aff36f1f3092e229346d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Thu, 4 May 2023 09:58:42 +0100
Subject: [PATCH 096/111] Thresholding rework to support parameter readback.

---
 finn-rtllib/thresholding/hdl/axilite_if.v     | 210 ++++++++++++
 finn-rtllib/thresholding/hdl/thresholding.sv  | 283 ++++++++++++----
 .../thresholding/hdl/thresholding_axi.sv      | 200 +++--------
 .../hdl/thresholding_axi_wrapper.v            | 130 --------
 finn-rtllib/thresholding/sim/thresholding.tcl |  17 +
 .../thresholding/sim/thresholding_axi_tb.sv   | 314 ++++++++++++++++++
 .../thresholding/sim/thresholding_tb.sv       | 272 +++++++++++++++
 7 files changed, 1067 insertions(+), 359 deletions(-)
 create mode 100644 finn-rtllib/thresholding/hdl/axilite_if.v
 delete mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
 create mode 100644 finn-rtllib/thresholding/sim/thresholding.tcl
 create mode 100644 finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
 create mode 100644 finn-rtllib/thresholding/sim/thresholding_tb.sv

diff --git a/finn-rtllib/thresholding/hdl/axilite_if.v b/finn-rtllib/thresholding/hdl/axilite_if.v
new file mode 100644
index 0000000000..bdd4de288e
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/axilite_if.v
@@ -0,0 +1,210 @@
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+module axi4lite_if
+#(
+    parameter ADDR_WIDTH = 32,
+    parameter DATA_WIDTH = 32,//AXI4 spec requires this to be strictly 32 or 64
+    parameter IP_DATA_WIDTH = 64//can be any power-of-2 multiple of DATA_WIDTH
+)
+(
+//system signals
+input aclk,
+input aresetn,//active low, asynchronous assertion and synchronous deassertion
+
+//Write channels
+//write address
+output reg                  awready,
+input                       awvalid,
+input [ADDR_WIDTH-1:0]      awaddr,
+input [2:0]                 awprot,
+//write data
+output reg                  wready,
+input                       wvalid,
+input [DATA_WIDTH-1:0]      wdata,
+input [(DATA_WIDTH/8)-1:0]  wstrb,
+//burst response
+input                       bready,
+output reg                  bvalid,
+output reg [1:0]            bresp,//NOTE: 00 = OKAY, 10 = SLVERR (write error)
+
+//Read channels
+//read address
+output reg                  arready,
+input                       arvalid,
+input [ADDR_WIDTH-1:0]      araddr,
+input [2:0]                 arprot,
+//read data
+input                       rready,
+output reg                  rvalid,
+output reg [1:0]            rresp,//NOTE: 00 = OKAY, 10 = SLVERR (read error)
+output reg [DATA_WIDTH-1:0] rdata,
+
+//IP-side interface
+output reg                  ip_en,
+output reg                  ip_wen,
+output reg [ADDR_WIDTH-1:0] ip_addr,
+output [IP_DATA_WIDTH-1:0]  ip_wdata,
+input                       ip_rack,
+input [IP_DATA_WIDTH-1:0]      ip_rdata
+);
+
+localparam RESP_OKAY = 2'b00;
+localparam RESP_SLVERR = 2'b10;
+//get ceil(log2(ceil(IP_DATA_WIDTH/DATA_WIDTH)))
+localparam NFOLDS_LOG = $clog2((IP_DATA_WIDTH + DATA_WIDTH - 1) / DATA_WIDTH);
+
+reg                      internal_ren;
+reg                      internal_wen;
+reg                      internal_wack;
+reg [ADDR_WIDTH-1:0]     internal_raddr;
+reg [ADDR_WIDTH-1:0]     internal_waddr;
+reg [DATA_WIDTH-1:0]     internal_wdata;
+wire [DATA_WIDTH-1:0]    internal_rdata;
+reg                      internal_error = 0;
+
+//check DATA_WIDTH
+initial begin
+    if(DATA_WIDTH != 32 & DATA_WIDTH != 64) begin
+        $display("AXI4Lite DATA_WIDTH must be 32 or 64");
+        $finish;
+    end
+end
+
+//transaction state machine
+localparam  STATE_IDLE  = 0,
+            STATE_READ  = 1,
+            STATE_WRITE = 2;
+
+reg [1:0] state;
+
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn)
+        state <= STATE_IDLE;
+    else case(state)
+        STATE_IDLE:
+            if(awvalid & wvalid)
+                state <= STATE_WRITE;
+            else if(arvalid)
+                state <= STATE_READ;
+        STATE_READ:
+            if(rvalid & rready)
+                state <= STATE_IDLE;
+        STATE_WRITE:
+            if(bvalid & bready)
+                state <= STATE_IDLE;
+        default: state <= STATE_IDLE;
+    endcase
+
+//write-related internal signals
+always @(*) begin
+    internal_waddr = awaddr >> $clog2(DATA_WIDTH/8);
+    internal_wdata = wdata;
+    internal_wen = (state == STATE_IDLE) & awvalid & wvalid;
+end
+
+always @(posedge aclk) begin
+    awready <= internal_wen;
+    wready <= internal_wen;
+end
+
+//read-related internal signals
+always @(*) begin
+    internal_raddr = araddr >> $clog2(DATA_WIDTH/8);
+    internal_ren = (state == STATE_IDLE) & ~internal_wen & arvalid;
+end
+
+always @(posedge aclk)
+    arready <= internal_ren;
+
+wire write_to_last_fold;
+
+always @(posedge aclk) begin
+    ip_wen <= write_to_last_fold;
+    ip_en <= internal_ren | write_to_last_fold;
+    if(internal_ren | write_to_last_fold)
+        ip_addr <= internal_ren ? (internal_raddr >> NFOLDS_LOG) : (internal_waddr >> NFOLDS_LOG);
+    internal_wack <= internal_wen;
+end
+
+genvar i;
+reg [(1<<NFOLDS_LOG)*DATA_WIDTH-1:0] ip_wdata_wide;
+generate
+if(NFOLDS_LOG == 0) begin: no_fold
+    assign write_to_last_fold = internal_wen;
+    assign internal_rdata = ip_rdata;
+    always @(posedge aclk)
+        ip_wdata_wide <= internal_wdata;
+end else begin: fold
+    reg [NFOLDS_LOG-1:0] internal_rfold;
+    assign write_to_last_fold = internal_wen & (internal_waddr[NFOLDS_LOG-1:0] == {(NFOLDS_LOG){1'b1}});
+    assign internal_rdata = ip_rdata >> (internal_rfold*DATA_WIDTH);
+    always @(posedge aclk)
+        if(internal_ren)
+            internal_rfold <= internal_raddr[NFOLDS_LOG-1:0];
+    for(i=0; i<(1<<NFOLDS_LOG); i = i+1) begin: gen_wdata
+        always @(posedge aclk)
+            if(internal_waddr[NFOLDS_LOG-1:0] == i)
+                ip_wdata_wide[(i+1)*DATA_WIDTH-1:i*DATA_WIDTH] <= internal_wdata;
+    end
+end
+endgenerate
+assign ip_wdata = ip_wdata_wide[IP_DATA_WIDTH-1:0];
+
+//write response on AXI4L bus
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn) begin
+        bvalid <= 0;//AXI4 spec requires BVALID pulled LOW during reset
+        bresp <= RESP_OKAY;
+    end else if(internal_wack) begin
+        bvalid <= 1;
+        bresp <= internal_error ? RESP_SLVERR : RESP_OKAY;
+    end else if(bready) begin
+        bvalid <= 0;
+        bresp <= RESP_OKAY;
+    end
+
+//read response on AXI4L bus
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn) begin
+        rvalid <= 0;//AXI4 spec requires RVALID pulled LOW during reset
+        rdata <= 0;
+        rresp <= RESP_OKAY;
+    end else if(ip_rack) begin
+        rvalid <= 1;
+        rdata <= internal_rdata;
+        rresp <= internal_error ? RESP_SLVERR : RESP_OKAY;
+    end else if(rready) begin
+        rvalid <= 0;
+        rdata <= 0;
+        rresp <= RESP_OKAY;
+    end
+
+endmodule
diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index d16a9219d7..bfd7e5d8ff 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -47,11 +47,13 @@ module thresholding #(
 	int unsigned  N,  // output precision
 	int unsigned  K,  // input/threshold precision
 	int unsigned  C,  // number of channels
+	int unsigned  PE, // parallel processing elements
 
 	bit  SIGNED = 1,  // signed inputs
+	bit  FPARG  = 0,  // floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,  // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	localparam int unsigned  C_BITS = C < 2? 1 : $clog2(C),
+	localparam int unsigned  CF = C/PE,  // Channel fold
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
 		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
@@ -61,101 +63,238 @@ module thresholding #(
 	input	logic  rst,
 
 	// Threshold Configuration
-	input	logic  twe,
-	input	logic [$clog2(C)+N-1:0]  twa,
-	input	logic [          K-1:0]  twd,
-
-	// Clock Enable for Stream Processing
-	input	logic  en,
+	input	logic  cfg_en,
+	input	logic  cfg_we,
+	input	logic [$clog2(CF)+$clog2(PE)+N-1:0]  cfg_a,
+	input	logic [K-1:0]  cfg_d,
+	output	logic  cfg_rack,
+	output	logic [K-1:0]  cfg_q,
 
 	// Input Stream
+	output	logic  irdy,
 	input	logic  ivld,
-	input	logic [C_BITS-1:0]  icnl,	// Ignored for C == 1
-	input	logic [K     -1:0]  idat,
+	input	logic [PE-1:0][K-1:0]  idat,
 
 	// Output Stream
+	input	logic  ordy,
 	output	logic  ovld,
-	output	logic [C_BITS-1:0]  ocnl,
-	output	logic [O_BITS-1:0]  odat
+	output	logic [PE-1:0][O_BITS-1:0]  odat
 );
 
-	// Pipeline Links & Feed
+	// Parameter Constraints Checking
+	initial begin
+		if(CF*PE != C) begin
+			$error("Parallelism PE=%0d is not a multiple of channel count C=%0d.", PE, C);
+			$finish;
+		end
+	end
+
+	// Operations within Pipeline
+	typedef enum logic [1:0] {
+		NOP = 2'b00, // No operation
+		TH  = 2'b01, // Thresholding
+		WR  = 2'b11, // Write (initialization)
+		RB  = 2'b10, // Readback (validation)
+		CFG = 2'b1x  // Config op (pointer-preserving)
+	} op_e;
+
+	// Pipeline Link Type
+	typedef logic [$clog2(CF)+N-1:0]  ptr_t;
+	typedef logic [K           -1:0]  val_t;
 	typedef struct packed {
-		logic               vld;	// Valid data identification
-		logic [C_BITS-1:0]  cnl;	// Channel
-		logic [K     -1:0]  val;	// Original input value
-		logic [0:N-1]       res;	// Assembling result with valid prefix [0:stage] after stage #stage
+		op_e   op;
+		ptr_t  ptr;	// WR/RB: address;         TH: result
+		val_t  val;	// WR/RB: threshold value; TH: input value
 	} pipe_t;
-	uwire pipe_t  pipe[0:N];
-	assign	pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} };	// Feed original input
 
-	// Stages: 0, 1, ..., N-1
-	uwire [0:N-1]  tws = (twa[N-1:0]+1) & ~twa[N-1:0];   // Write Select per stage by address suffix
-	for(genvar  stage = 0; stage < N; stage++) begin : genStages
+	//-----------------------------------------------------------------------
+	// Pipeline Feed
+	//	- configuration always takes precedence
+	//	- number of pending thresholding ops capped to N+3
+	//	  across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1
+	pipe_t  pipe[PE][N+1];
+	if(1) begin : blkFeed
+
+		// Thresholding Input Guard ensuring Output FIFO is never overrun
+		logic signed [$clog2(N+3):0]  GuardSem = N+2;	// N+2, N+1, ..., 0, -1
+		uwire  th_full = GuardSem[$left(GuardSem)];
+		always_ff @(posedge clk) begin
+			if(rst)  GuardSem <= N+2;
+			else begin
+				automatic logic  dec = !cfg_en && !th_full && ivld;
+				automatic logic  inc = ovld && ordy;
+				GuardSem <= GuardSem + (inc == dec? 0 : inc? 1 : -1);
+			end
+		end
 
-		// Threshold Memory
-		uwire [K-1:0]  thresh;
-		if(1) begin : blkUpdate
+		// PE Configuration Address Decoding
+		uwire  cfg_sel[PE];
+		if(PE == 1)  assign  cfg_sel[0] = 1;
+		else begin
+			for(genvar  pe = 0; pe < PE; pe++) begin
+				assign	cfg_sel[pe] = cfg_en && (cfg_a[N+:$clog2(PE)] == pe);
+			end
+		end
 
-			// Write control: local select from global address
-			uwire  we = twe && tws[stage];
-			if((C == 1) && (stage == 0)) begin
-				logic [K-1:0]  Thresh = 'x;
-				always_ff @(posedge clk) begin
-					if(we)  Thresh <= twd;
+
+		uwire ptr_t  iptr;
+		assign	iptr[0+:N] = cfg_a[0+:N];
+		if(CF > 1) begin
+			// Channel Fold Rotation
+			logic [$clog2(CF)-1:0]  CnlCnt = 0;
+			logic                   CnlLst = 0;
+			always_ff @(posedge clk) begin
+				if(rst) begin
+					CnlCnt <= 0;
+					CnlLst <= 0;
+				end
+				else if(!cfg_en && !th_full && ivld) begin
+					CnlCnt <= CnlCnt + (CnlLst? 1-CF : 1);
+					CnlLst <= CnlCnt == CF-2;
 				end
-				assign  thresh = Thresh;
 			end
-			else begin
-				logic [K-1:0]  Threshs[C * 2**stage];
-				uwire [$clog2(C)+stage-1:0]  wa = twa[$left(twa):N-stage];
-				uwire [$clog2(C)+stage-1:0]  ra;
-				if(C > 1)  assign  ra[stage+:C_BITS] = pipe[stage].cnl;
-				if(stage)  assign  ra[stage-1:0]     = pipe[stage].res[0:stage-1];
-
-				// Write
-				always_ff @(posedge clk) begin
-					if(we)  Threshs[wa] <= twd;
+
+			assign  iptr[N+:$clog2(CF)] = cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt;
+		end
+
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			assign	pipe[pe][0] = '{
+				op:  cfg_en?
+					(!cfg_sel[pe]? NOP : cfg_we? WR : RB) :
+					(ivld && !th_full? TH : NOP),
+				ptr: iptr,
+				val: !cfg_en? idat[pe] : cfg_we? cfg_d : 0
+			};
+		end
+
+		assign	irdy = !cfg_en && !th_full;
+	end : blkFeed
+
+	//-----------------------------------------------------------------------
+	// Free-Running Thresholding Pipeline
+	for(genvar  stage = 0; stage < N; stage++) begin : genStages
+
+		localparam int unsigned  SN = N-1-stage;
+		for(genvar  pe = 0; pe < PE; pe++) begin : genPE
+			uwire pipe_t  p = pipe[pe][stage];
+			uwire  cs = (p.ptr[SN:0] == 2**SN-1);
+
+			// Threshold Memory
+			logic [K-1:0]  Thresh = 'x;	// Read-out register
+			if(1) begin : blkThreshMem
+				uwire  we = (p.op ==? WR) && cs;
+				if((CF == 1) && (stage == 0)) begin
+					always_ff @(posedge clk) begin
+						if(we)  Thresh <= p.val;
+					end
+				end
+				else begin
+					logic [K-1:0]  Threshs[CF * 2**stage];
+					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
+					always_ff @(posedge clk) begin
+						if(we)  Threshs[addr] <= p.val;
+						Thresh <= Threshs[addr];
+					end
 				end
+			end : blkThreshMem
 
-				// Read
-				logic [K-1:0]  RdReg;
-				always_ff @(posedge clk) begin
-					if(en)  RdReg <= Threshs[ra];
+			// Pipeline State
+			pipe_t  P = '{ op: NOP, default: 'x };
+			logic   Reval = 0;
+			always_ff @(posedge clk) begin
+				if(rst) begin
+					P <= '{ op: NOP, default: 'x };
+					Reval <= 0;
+				end
+				else begin
+					P <= p;
+					Reval <= (p.op ==? RB) && cs;
 				end
-				assign	thresh = RdReg;
 			end
 
-		end : blkUpdate
+			logic  cmp;
+			if(!SIGNED)		assign	cmp = $unsigned(Thresh) <= $unsigned(P.val);
+			else if(!FPARG)	assign	cmp =   $signed(Thresh) <=   $signed(P.val);
+			else begin : blkSignedFloat
+				uwire  mag_eq = Thresh[K-2:0] == P.val[K-2:0];
+				uwire  mag_le = Thresh[K-2:0] <= P.val[K-2:0];
+				always_comb begin
+					unique case({Thresh[K-1], P.val[K-1]})
+					2'b00:  cmp = mag_le;
+					2'b01:  cmp = 0;
+					2'b10:  cmp = 1;
+					2'b11:  cmp = !mag_le || mag_eq;
+					default: cmp = 'x;
+					endcase
+				end
+			end : blkSignedFloat
+			always_comb begin
+				automatic pipe_t  pp = P;
+				if(P.op !=? CFG)  pp.ptr[SN] = cmp;
+				if(Reval)         pp.val = Thresh;
+				pipe[pe][stage+1] = pp;
+			end
 
-		// Pipeline regs simply copying the input
-		pipe_t  State = '{ vld: 0, cnl: 'x, val: 'x, res: 'x };
-		always_ff @(posedge clk) begin
-			if(rst)      State <= '{ vld: 0, cnl: 'x, val: 'x, res: 'x };
-			else if(en)  State <= pipe[stage];
-		end
+		end : genPE
+	end : genStages
 
-		// Assemble pipeline data
-		logic [0:N-1]  res;
-		uwire  cmp =
-			SIGNED?      $signed(thresh) <=   $signed(State.val) :
-			/* else */ $unsigned(thresh) <= $unsigned(State.val);
-		always_comb begin
-			res        = State.res;
-			res[stage] = cmp;	// Patch in next result bit
+	//-----------------------------------------------------------------------
+	// Configuration Readback
+	always_comb begin
+		cfg_rack = 0;
+		cfg_q = 0;
+		foreach(pipe[pe]) begin
+			automatic pipe_t  p = pipe[pe][N];
+			cfg_rack |= p.op ==? RB;
+			cfg_q    |= p.val;
 		end
-		assign	pipe[stage+1] = '{
-			vld: State.vld,
-			cnl: State.cnl,
-			val: State.val,
-			res: res
-		};
+	end
 
-	end : genStages
+	//-----------------------------------------------------------------------
+	// Stream Output through FIFO
+	//	- Depth of N + Output Reg to allow pipe to drain entirely under backpressure
+	//	- Typically mapped to an SRL shift register
+	if(1) begin : blkStreamOutput
+		localparam int unsigned  A_DEPTH = N+2;
+		logic        [PE-1 : 0][N-1 : 0]  ADat[A_DEPTH];
+		logic signed [$clog2(A_DEPTH):0]  APtr = '1;	// -1, 0, 1, ..., A_DEPTH-1
+		uwire  avld = !APtr[$left(APtr)];
 
-	// Output
-	assign	ovld = pipe[N].vld;
-	assign	ocnl = pipe[N].cnl;
-	assign	odat = pipe[N].res + BIAS;
+		logic [PE-1:0][N-1:0]  BDat = 'x;
+		logic  BVld =  0;
+
+		uwire  aload = pipe[0][N].op ==? TH;
+		uwire  bload = !BVld || ordy;
+
+		always_ff @(posedge clk) begin
+			if(aload) begin
+				assert(APtr < $signed(A_DEPTH-1)) else begin
+					$error("Overrun after failing stream guard.");
+					$stop;
+				end
+				foreach(pipe[pe])  ADat[0][pe] <= pipe[pe][N].ptr;
+				for(int unsigned  i = 1; i < A_DEPTH; i++)  ADat[i] <= ADat[i-1];
+			end
+		end
+		always_ff @(posedge clk) begin
+			if(rst)  APtr <= '1;
+			else     APtr <= APtr + (aload == (avld && bload)? 0 : aload? 1 : -1);
+		end
+		always_ff @(posedge clk) begin
+			if(rst) begin
+				BDat <= 'x;
+				BVld <=  0;
+			end
+			else if(bload) begin
+				BDat <= ADat[APtr];
+				BVld <= avld;
+			end
+		end
+
+		assign	ovld = BVld;
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			assign	odat[pe] = BDat[pe] + BIAS;
+		end
+	end : blkStreamOutput
 
 endmodule : thresholding
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 2f0393a3e7..98bbe20691 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -39,15 +39,16 @@
  *****************************************************************************/
 
 module thresholding_axi #(
-	int unsigned  N,	// output precision
-	int unsigned  K,	// input/threshold precision
-	int unsigned  C,	// Channels
-	int unsigned  PE,	// Processing Parallelism, requires C = k*PE
+	int unsigned  N =  4,	// output precision
+	int unsigned  K = 16,	// input/threshold precision
+	int unsigned  C = 1,	// Channels
+	int unsigned  PE = 1,	// Processing Parallelism, requires C = k*PE
 
 	bit  SIGNED = 1,	// signed inputs
+	bit  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	localparam int unsigned  CF = 1 + (C-1)/PE,	// Channel Fold
+	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
@@ -73,9 +74,9 @@ module thresholding_axi #(
 	output	logic [1:0]  s_axilite_BRESP,
 
 	// Reading
-	input	logic        s_axilite_ARVALID,
-	output	logic        s_axilite_ARREADY,
-	input	logic [0:0]  s_axilite_ARADDR,
+	input	logic                  s_axilite_ARVALID,
+	output	logic                  s_axilite_ARREADY,
+	input	logic [ADDR_BITS-1:0]  s_axilite_ARADDR,
 
 	output	logic         s_axilite_RVALID,
 	input	logic         s_axilite_RREADY,
@@ -92,154 +93,39 @@ module thresholding_axi #(
 	output	logic  m_axis_tvalid,
 	output	logic [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
-	//- Parameter Constraints Checking --------------------------------------
-	initial begin
-		if(C%PE != 0) begin
-			$error("%m: Channel count C=%0d is not a multiple of PE=%0d.", C, PE);
-			$finish;
-		end
-	end
-
-	//- Global Control ------------------------------------------------------
-	uwire  clk = ap_clk;
-	uwire  rst = !ap_rst_n;
-
-	//- AXI Lite: Threshold Configuration -----------------------------------
-	uwire  twe[PE];
-	uwire [$clog2(CF)+N-1:0]  twa;
-	uwire [           K-1:0]  twd;
-	if(1) begin : blkAxiLite
-		logic  WABusy = 0;
-		logic  WDBusy = 0;
-		logic  Sel[PE] = '{ default: 'x };
-		logic [$clog2(CF)+N-1:0]  Addr = 'x;
-		logic [           K-1:0]  Data = 'x;
-
-		for(genvar  pe = 0; pe < PE; pe++) begin
-			assign	twe[pe] = WABusy && WDBusy && Sel[pe];
-		end
-		assign	twa = Addr;
-		assign	twd = Data;
-
-		if(PE == 1)  always_comb  Sel[0] = 1;
-		else begin
-			always_ff @(posedge clk) begin
-				if(!WABusy) begin
-					foreach(Sel[pe])  Sel[pe] <= s_axilite_AWADDR[N+2+:$clog2(PE)] == pe;
-				end
-			end
-		end
-
-		uwire  clr_wr = rst || (WABusy && WDBusy && s_axilite_BREADY);
-		always_ff @(posedge clk) begin
-			if(clr_wr) begin
-				WABusy <= 0;
-				Addr <= 'x;
-				WDBusy <= 0;
-				Data <= 'x;
-			end
-			else begin
-				if(!WABusy) begin
-					WABusy <= s_axilite_AWVALID;
-					Addr[0+:N] <= s_axilite_AWADDR[2+:N];
-					if(CF > 1)  Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)];
-				end
-				if(!WDBusy) begin
-					WDBusy <= s_axilite_WVALID;
-					Data   <= s_axilite_WDATA[K-1:0];
-				end
-			end
-		end
-		assign	s_axilite_AWREADY = !WABusy;
-		assign	s_axilite_WREADY  = !WDBusy;
-		assign	s_axilite_BVALID  = WABusy && WDBusy;
-		assign	s_axilite_BRESP   = '0; // OK
-
-		// Answer all reads with '1
-		logic  RValid =  0;
-		uwire  clr_rd = rst || (RValid && s_axilite_RREADY);
-		always_ff @(posedge clk) begin
-			if(clr_rd)        RValid <=  0;
-			else if(!RValid)  RValid <= s_axilite_ARVALID;
-		end
-		assign	s_axilite_ARREADY = !RValid;
-		assign	s_axilite_RVALID  = RValid;
-		assign	s_axilite_RDATA   = '1;
-		assign	s_axilite_RRESP   = '0; // OK
-
-	end : blkAxiLite
-
-	//- IO-Sandwich with two-stage output buffer for containing a local enable
-	uwire  en;
-	uwire [PE-1:0][O_BITS-1:0]  odat;
-	uwire  ovld[PE];
-	if(1) begin : blkOutputDecouple
-		typedef struct {
-			logic  vld;
-			logic [PE-1:0][O_BITS-1:0]  dat;
-		} buf_t;
-		buf_t  A = '{ vld: 0, dat: 'x };
-		buf_t  B = '{ vld: 0, dat: 'x };
-		always_ff @(posedge clk) begin
-			if(rst) begin
-				A <= '{ vld: 0, dat: 'x };
-				B <= '{ vld: 0, dat: 'x };
-			end
-			else begin
-				if(!B.vld || m_axis_tready) begin
-					B <= '{
-						vld: A.vld || ovld[0],
-						dat: A.vld? A.dat : odat
-					};
-				end
-				A.vld <= B.vld && !m_axis_tready && (A.vld || ovld[0]);
-				if(!A.vld)  A.dat <= odat;
-			end
-		end
-		assign	en = !A.vld;
-
-		assign	m_axis_tvalid = B.vld;
-		assign	m_axis_tdata  = B.dat;
-
-	end : blkOutputDecouple
-
-	localparam int unsigned  C_BITS = C/PE < 2? 1 : $clog2(C/PE);
-	uwire  ivld = s_axis_tvalid;
-	uwire [C_BITS-1:0]  icnl;
-	uwire [K     -1:0]  idat[PE];
-	for(genvar  pe = 0; pe < PE; pe++) begin
-		assign	idat[pe] = s_axis_tdata[pe*K+:K];
-	end
-
-	assign	s_axis_tready = en;
-	if(C == PE)  assign  icnl = 'x;
-	else begin
-		logic [C_BITS-1:0]  Chnl = 0;
-		logic               Last = 0;
-		uwire  inc = ivld && en;
-		uwire  clr = rst || (Last && inc);
-		always_ff @(posedge clk) begin
-			if(clr) begin
-				Chnl <= 0;
-				Last <= 0;
-			end
-			else if(inc) begin
-				Chnl <= Chnl + 1;
-				Last <= (~Chnl & (C/PE-2)) == 0;
-			end
-		end
-		assign	icnl = Chnl;
-	end
-
-	// Core Thresholding Modules
-	for(genvar  pe = 0; pe < PE; pe++) begin : genCores
-		thresholding #(.N(N), .K(K), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core (
-			.clk, .rst,
-			.twe(twe[pe]), .twa, .twd,
-			.en,
-			.ivld, .icnl, .idat(idat[pe]),
-			.ovld(ovld[pe]), .ocnl(), .odat(odat[pe])
-		);
-	end : genCores
+
+	//-----------------------------------------------------------------------
+	// AXI-lite Configuration Interface
+	uwire  cfg_en;
+	uwire  cfg_we;
+	uwire [ADDR_BITS-1:0]  cfg_a;
+	uwire [K        -1:0]  cfg_d;
+	uwire  cfg_rack;
+	uwire [K        -1:0]  cfg_q;
+	axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
+		.aclk(ap_clk), .aresetn(ap_rst_n),
+
+		.awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x),
+		.wready(s_axilite_WREADY),   .wvalid(s_axilite_WVALID),   .wdata(s_axilite_WDATA),   .wstrb(s_axilite_WSTRB),
+		.bready(s_axilite_BREADY),   .bvalid(s_axilite_BVALID),   .bresp(s_axilite_BRESP),
+
+		.arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x),
+		.rready(s_axilite_RREADY),   .rvalid(s_axilite_RVALID),   .rresp(s_axilite_RRESP),   .rdata(s_axilite_RDATA),
+
+		.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d),
+		.ip_rack(cfg_rack), .ip_rdata(cfg_q)
+	);
+
+	//-----------------------------------------------------------------------
+	// Kernel Implementation
+	thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS)) impl (
+		.clk(ap_clk), .rst(!ap_rst_n),
+
+		.cfg_en, .cfg_we, .cfg_a, .cfg_d,
+		.cfg_rack, .cfg_q,
+
+		.irdy(s_axis_tready), .ivld(s_axis_tvalid), .idat(s_axis_tdata),
+		.ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata)
+	);
 
 endmodule : thresholding_axi
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
deleted file mode 100644
index 2657b39d98..0000000000
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ /dev/null
@@ -1,130 +0,0 @@
-/******************************************************************************
- * Copyright (C) 2022, Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *  1. Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *
- *  3. Neither the name of the copyright holder nor the names of its
- *     contributors may be used to endorse or promote products derived from
- *     this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * @brief	IPI-compatible Verilog wrapper for thresholding_axi module.
- * @author	Thomas B. Preußer <tpreusse@amd.com>
- *****************************************************************************/
-
-module $MODULE_NAME_AXI_WRAPPER$ #(
-	parameter  N = $N$,	// output precision
-	parameter  K = $M$,	// input/threshold precision
-	parameter  C = $C$,	// Channels
-	parameter  PE = $PE$,
-	parameter  SIGNED = $SIGNED$,	// signed inputs
-	parameter  BIAS = $BIAS$,		// offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS)
-
-	parameter  O_BITS = BIAS > 0?
-		/* unsigned */ $clog2(2**N+BIAS) :
-		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
-)(
-	//- Global Control ------------------
-	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
-	input	ap_clk,
-	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
-	input	ap_rst_n,
-
-	//- AXI Lite ------------------------
-	// Writing
-	input	                   s_axilite_AWVALID,
-	output	                   s_axilite_AWREADY,
-	input	[$clog2(C)+N+1:0]  s_axilite_AWADDR,
-
-	input	        s_axilite_WVALID,
-	output	        s_axilite_WREADY,
-	input	[31:0]  s_axilite_WDATA,
-	input	[ 3:0]  s_axilite_WSTRB,
-
-	output	       s_axilite_BVALID,
-	input	       s_axilite_BREADY,
-	output	[1:0]  s_axilite_BRESP,
-
-	// Reading
-	input	       s_axilite_ARVALID,
-	output	       s_axilite_ARREADY,
-	input	[0:0]  s_axilite_ARADDR,
-
-	output	        s_axilite_RVALID,
-	input	        s_axilite_RREADY,
-	output	[31:0]  s_axilite_RDATA,
-	output	[ 1:0]  s_axilite_RRESP,
-
-	//- AXI Stream - Input --------------
-	output	in0_V_TREADY,
-	input	in0_V_TVALID,
-	input	[((PE*K+7)/8)*8-1:0]  in0_V_TDATA,
-
-	//- AXI Stream - Output -------------
-	input	out_V_TREADY,
-	output	out_V_TVALID,
-	output	[((PE*O_BITS+7)/8)*8-1:0]  out_V_TDATA
-);
-
-	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst (
-		//- Global Control ------------------
-		.ap_clk(ap_clk),
-		.ap_rst_n(ap_rst_n),
-
-		//- AXI Lite ------------------------
-		// Writing
-		.s_axilite_AWVALID(s_axilite_AWVALID),
-		.s_axilite_AWREADY(s_axilite_AWREADY),
-		.s_axilite_AWADDR(s_axilite_AWADDR),
-
-		.s_axilite_WVALID(s_axilite_WVALID),
-		.s_axilite_WREADY(s_axilite_WREADY),
-		.s_axilite_WDATA(s_axilite_WDATA),
-		.s_axilite_WSTRB(s_axilite_WSTRB),
-
-		.s_axilite_BVALID(s_axilite_BVALID),
-		.s_axilite_BREADY(s_axilite_BREADY),
-		.s_axilite_BRESP(s_axilite_BRESP),
-
-		// Reading
-		.s_axilite_ARVALID(s_axilite_ARVALID),
-		.s_axilite_ARREADY(s_axilite_ARREADY),
-		.s_axilite_ARADDR(s_axilite_ARADDR),
-
-		.s_axilite_RVALID(s_axilite_RVALID),
-		.s_axilite_RREADY(s_axilite_RREADY),
-		.s_axilite_RDATA(s_axilite_RDATA),
-		.s_axilite_RRESP(s_axilite_RRESP),
-
-		//- AXI Stream - Input --------------
-		.s_axis_tready(in0_V_TREADY),
-		.s_axis_tvalid(in0_V_TVALID),
-		.s_axis_tdata(in0_V_TDATA),
-
-		//- AXI Stream - Output -------------
-		.m_axis_tready(out_V_TREADY),
-		.m_axis_tvalid(out_V_TVALID),
-		.m_axis_tdata(out_V_TDATA)
-	);
-
-endmodule : $MODULE_NAME_AXI_WRAPPER$
diff --git a/finn-rtllib/thresholding/sim/thresholding.tcl b/finn-rtllib/thresholding/sim/thresholding.tcl
new file mode 100644
index 0000000000..82dc59deb1
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding.tcl
@@ -0,0 +1,17 @@
+create_project -force thresholding thresholding.vivado -part xcvc1902-vsva2197-2MP-e-S
+set_property board_part xilinx.com:vck190:part0:2.2 [current_project]
+
+read_verilog hdl/axilite_if.v
+read_verilog -sv { hdl/thresholding.sv hdl/thresholding_axi.sv }
+
+set simset [current_fileset -simset]
+set_property -name xsim.simulate.log_all_signals -value true -objects $simset
+set_property -name xsim.simulate.runtime -value all -objects $simset
+add_files -fileset $simset { sim/thresholding_tb.sv sim/thresholding_axi_tb.sv }
+
+foreach top { thresholding_tb thresholding_axi_tb } {
+	set_property top $top $simset
+
+	launch_simulation
+	close_sim
+}
diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
new file mode 100644
index 0000000000..200d4d5999
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
@@ -0,0 +1,314 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *	 this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *	 contributors may be used to endorse or promote products derived from
+ *	 this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Testbench for thresholding_axi.
+ * @author	Monica Chiosa <monica.chiosa@amd.com>
+ *
+ */
+
+module thresholding_axi_tb #(
+	int unsigned  N  = 4,	// output precision
+	int unsigned  C  = 6,	// number of channels
+	int unsigned  PE = 2,
+	real  M0 = 7.3,			// slope of the uniform thresholding line
+	real  B0 = 3.1,			// offset of the uniform thresholding line
+	bit  THROTTLED = 1,
+
+	localparam int unsigned  CF = C/PE,	// Channel Fold
+	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2
+);
+
+	//-----------------------------------------------------------------------
+	// Design Geometry
+
+	// For each channel = [0,channel):
+	//	 M_channel = M0 + CX*channel
+	//	 B_channel = B0 + CX*channel
+	// Input/threshold precision computed according with the maximum posible value
+	localparam real  CX = 1.375;
+	localparam int unsigned K = $clog2((2**N-1)*(M0+C*CX) + (B0+C*CX)); // unused sign + magnitude
+	localparam int unsigned C_BITS = C < 2? 1 : $clog2(C);
+
+	localparam int unsigned MST_STRM_WROUNDS = 503;
+
+	typedef int unsigned  threshs_t[C][2**N-1];
+	function threshs_t init_thresholds();
+		automatic threshs_t  res;
+		for(int unsigned  c = 0; c < C; c++) begin
+			automatic real  m = M0 + c*CX;
+			automatic real  b = B0 + c*CX;
+			foreach(res[c][i]) begin
+				res[c][i] = int'($ceil(m*i + b));
+			end
+		end
+		return  res;
+	endfunction : init_thresholds
+	localparam threshs_t  THRESHS = init_thresholds();
+
+	//-----------------------------------------------------------------------
+	// Clock and Reset Control
+	logic  clk = 0;
+	always #5ns clk = !clk;
+	logic  rst = 1;
+	initial begin
+		#10ns;
+		@(posedge clk);
+		rst <= 0;
+	end
+
+	//-----------------------------------------------------------------------
+	// DUT
+	logic                  s_axilite_AWVALID;
+	uwire                  s_axilite_AWREADY;
+	logic [ADDR_BITS-1:0]  s_axilite_AWADDR;	// lowest 2 bits (byte selectors) are ignored
+	logic                  s_axilite_WVALID;
+	uwire                  s_axilite_WREADY;
+	logic [         31:0]  s_axilite_WDATA;
+	uwire                  s_axilite_BVALID;
+	logic                  s_axilite_BREADY;
+	uwire [          1:0]  s_axilite_BRESP;
+	logic                  s_axilite_ARVALID;
+	uwire                  s_axilite_ARREADY;
+	logic [ADDR_BITS-1:0]  s_axilite_ARADDR;
+	uwire                  s_axilite_RVALID;
+	uwire                  s_axilite_RREADY = 1;
+	uwire [         31:0]  s_axilite_RDATA;
+	uwire [          1:0]  s_axilite_RRESP;
+
+	uwire  irdy;
+	logic  ivld;
+	logic [PE-1:0][K-1:0]  idat;
+
+	logic  ordy = 0;
+	uwire  ovld;
+	uwire [PE-1:0][N-1:0]  odat;
+
+	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0)) dut (
+		.ap_clk(clk), .ap_rst_n(!rst),
+
+		// Configuration
+		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
+		.s_axilite_WVALID,  .s_axilite_WREADY,  .s_axilite_WDATA, .s_axilite_WSTRB('1),
+		.s_axilite_BVALID,  .s_axilite_BREADY,  .s_axilite_BRESP,
+		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
+		.s_axilite_RVALID,  .s_axilite_RREADY,  .s_axilite_RDATA, .s_axilite_RRESP,
+
+		// Stream Processing
+		.s_axis_tready(irdy), .s_axis_tvalid(ivld), .s_axis_tdata(idat),
+		.m_axis_tready(ordy), .m_axis_tvalid(ovld), .m_axis_tdata(odat)
+	);
+
+	//-----------------------------------------------------------------------
+	// Input Stimuli
+	typedef logic [PE-1:0][K-1:0]  input_t;
+	typedef logic [$clog2(CF)+$clog2(PE)+N-1:0]  addr_t;
+	input_t  QW[$];  // Input Feed Tracing
+	addr_t   QC[$];
+
+	int unsigned  error_cnt = 0;
+	bit  done = 0;
+	initial begin
+		// Report testbench details
+		$display("Testbench - tresholding K=%0d -> N=%0d", K, N);
+		for(int unsigned  c = 0; c < C; c++) begin
+			$write("Channel #%0d: Thresholds = {", c);
+			for(int unsigned  i = 0; i < 2**N-1; i++)  $write(" %0d", THRESHS[c][i]);
+			$display(" }");
+		end
+
+		// Config
+		s_axilite_AWVALID = 0;
+		s_axilite_AWADDR  = 'x;
+		s_axilite_WVALID  = 0;
+		s_axilite_WDATA   = 'x;
+		s_axilite_BREADY  = 0;
+		s_axilite_ARVALID = 0;
+		s_axilite_ARADDR  = 'x;
+
+		// Stream Input
+		ivld = 0;
+		idat = 'x;
+
+		@(posedge clk iff !rst);
+
+		// Threshold Configuratin
+		for(int unsigned  c = 0; c < C; c+=PE) begin
+			automatic addr_t  addr = 0;
+			if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
+			for(int unsigned  pe = 0; pe < PE; pe++) begin
+				if(PE > 1)  addr[N+:$clog2(PE)] = pe;
+				for(int unsigned  t = 0; t < 2**N-1; t++) begin
+					addr[0+:N] = t;
+					fork
+						begin
+							s_axilite_AWVALID <= 1;
+							s_axilite_AWADDR  <= { addr, 2'b00 };
+							@(posedge clk iff s_axilite_AWREADY);
+							s_axilite_AWVALID <= 0;
+							s_axilite_AWADDR  <= 'x;
+						end
+						begin
+							s_axilite_WVALID <= 1;
+							s_axilite_WDATA  <= THRESHS[c+pe][t];
+							@(posedge clk iff s_axilite_WREADY);
+							s_axilite_WVALID <= 0;
+							s_axilite_WDATA  <= 'x;
+						end
+						begin
+							s_axilite_BREADY <= 1;
+							@(posedge clk iff s_axilite_BVALID);
+							assert(s_axilite_BRESP == '0) else begin
+								$error("Error on parameter write.");
+								$stop;
+							end
+							s_axilite_BREADY <= 0;
+						end
+					join
+				end
+			end
+		end
+
+		fork
+			// Intermittent configuration readback
+			while(!done) begin
+				if(($urandom()%37) != 0) begin
+					s_axilite_ARVALID <= 0;
+					s_axilite_ARADDR  <= 'x;
+					@(posedge clk);
+				end
+				else begin
+					automatic addr_t  addr = $urandom()%(N-1);
+					if(PE > 1)  addr[N+:$clog2(PE)] = $urandom()%PE;
+					if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF;
+
+					s_axilite_ARVALID <= 1;
+					s_axilite_ARADDR  <= { addr, 2'b00 };
+					@(posedge clk iff s_axilite_ARREADY);
+
+					QC.push_back(addr);
+				end
+			end
+
+			// AXI4Stream MST Writes input values
+			repeat(MST_STRM_WROUNDS) begin
+				automatic input_t  dat;
+
+				while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk);
+
+				std::randomize(dat);
+				ivld <= 1;
+				idat <= dat;
+				@(posedge clk iff irdy);
+				ivld <=  0;
+				idat <= 'x;
+				QW.push_back(dat);
+			end
+		join_any
+		done <= 1;
+		repeat(N+6)  @(posedge clk);
+
+		assert(QW.size() == 0) else begin
+			$error("Missing %0d outputs.", QW.size());
+			$stop;
+		end
+		assert(QC.size() == 0) else begin
+			$error("Missing %0d readback replies.", QC.size());
+			$stop;
+		end
+
+		$display("Test completed: %0d errors in %0d tests.", error_cnt, MST_STRM_WROUNDS);
+		$display("=========================================");
+		$finish;
+	end
+
+	// Output Checker -------------------------------------------------------
+
+	// Configuration Readback
+	always_ff @(posedge clk iff s_axilite_RVALID) begin
+		assert(s_axilite_RRESP == '0) else begin
+			$error("Read back error.");
+			$stop;
+		end
+		assert(QC.size()) begin
+			automatic addr_t  addr = QC.pop_front();
+			automatic int unsigned  cnl =
+				(CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) +
+				(PE == 1? 0 : addr[N+:$clog2(PE)]);
+			automatic logic [K-1:0]  exp = THRESHS[cnl][addr[0+:N]];
+			assert(s_axilite_RDATA == exp) else begin
+				$error("Readback mismatch on #%0d.%0d: %0d instead of %0d", cnl, addr[0+:N], s_axilite_RDATA, exp);
+				$stop;
+			end
+		end
+		else begin
+			$error("Spurious readback output.");
+			$stop;
+		end
+	end
+
+	// Stream Output
+	int unsigned  OCnl = 0;
+	always @(posedge clk) begin
+		if(rst) begin
+			OCnl <= 0;
+			ordy <= 1'b0;
+		end
+		else begin
+			if(!ordy || ovld)  ordy <= ($urandom()%5 != 0) || !THROTTLED;
+
+			if(ordy && ovld) begin
+				assert(QW.size()) begin
+					automatic input_t  x = QW.pop_front();
+
+					for(int unsigned  pe = 0; pe < PE; pe++) begin
+						automatic int unsigned  cnl = OCnl + pe;
+
+						$display("Mapped CNL=%0d DAT=%3d -> #%2d", cnl, x[pe], odat[pe]);
+						assert(
+							((odat[pe] == 0) || (THRESHS[cnl][odat[pe]-1] <= x[pe])) &&
+							((odat[pe] == 2**N-1) || (x[pe] < THRESHS[cnl][odat[pe]]))
+						) else begin
+							$error("Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", cnl, x[pe], odat[pe]);
+							error_cnt++;
+							$stop;
+						end
+					end
+				end
+				else begin
+					$error("Spurious output.");
+					$stop;
+				end
+
+				OCnl <= (OCnl + PE)%C;
+			end
+		end
+	end
+
+endmodule: thresholding_axi_tb
diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv
new file mode 100644
index 0000000000..90dfba1022
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv
@@ -0,0 +1,272 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *	 this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *	 contributors may be used to endorse or promote products derived from
+ *	 this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Testbench for thresholding_axi.
+ * @author	Monica Chiosa <monica.chiosa@amd.com>
+ *
+ */
+
+module thresholding_tb #(
+	int unsigned  K  = 10,	// input precision
+	int unsigned  N  =  4,	// output precision
+	int unsigned  C  =  6,	// number of channels
+	int unsigned  PE =  2,
+
+	localparam int unsigned  CF = C/PE	// Channel Fold
+);
+	localparam int unsigned  MST_STRM_WROUNDS = 507;
+	localparam bit  THROTTLED = 1;
+
+	//-----------------------------------------------------------------------
+	// Clock and Reset Control
+	logic  clk = 0;
+	always #5ns clk = !clk;
+	logic  rst = 1;
+	initial begin
+		#10ns;
+		@(posedge clk);
+		rst <= 0;
+	end
+
+	//-----------------------------------------------------------------------
+	// Parallel Instances differing in Data Type
+	typedef logic [K -1:0]  val_t;
+	typedef val_t  threshs_t[C][2**N-1];
+	typedef val_t [PE-1:0]  input_t;
+	typedef logic [$clog2(CF)+$clog2(PE)+N-1:0]  addr_t;
+	logic [0:2]  term = '0;
+	always_comb begin
+		if(&term)  $finish;
+	end
+	for(genvar  i = 0; i < 3; i++) begin : genTypes
+		localparam bit  SIGNED = i>0;
+		localparam bit  FPARG  = i>1;
+
+		//- DUT -------------------------
+		logic  cfg_en;
+		logic  cfg_we;
+		logic [$clog2(C)+N-1:0]  cfg_a;
+		logic [K-1:0]  cfg_d;
+		uwire  cfg_rack;
+		uwire [K-1:0]  cfg_q;
+
+		uwire  irdy;
+		logic  ivld;
+		logic [PE-1:0][K-1:0]  idat;
+
+		logic  ordy = 0;
+		uwire  ovld;
+		uwire [PE-1:0][N-1:0]  odat;
+
+		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG)) dut (
+			.clk, .rst,
+
+			// Configuration
+			.cfg_en, .cfg_we, .cfg_a, .cfg_d,
+			.cfg_rack, .cfg_q,
+
+			// Stream Processing
+			.irdy, .ivld, .idat,
+			.ordy, .ovld, .odat
+		);
+
+		//- Stimulus Driver -------------
+		threshs_t  THRESHS;
+		function val_t sigord(input val_t  x);
+			automatic val_t  res = x;
+			if(SIGNED) begin
+				if(FPARG && x[K-1])  res[K-2:0] = ~x[K-2:0];
+				res[K-1] = !x[K-1];
+			end
+			return  res;
+		endfunction : sigord
+
+		input_t  QW[$];  // Input tracing
+		addr_t   QC[$];  // Readback tracking
+		int unsigned  error_cnt = 0;
+		bit  done = 0;
+		initial begin
+
+			// Generate thresholds
+			std::randomize(THRESHS);
+			foreach(THRESHS[c]) begin
+				val_t  row[2**N-1] = THRESHS[c];
+				row.sort with (sigord(item));
+				THRESHS[c] = row;
+			end
+
+			// Report test case details
+			$display("[%0d] Thresholding %s%s%0d -> uint%0d", i, SIGNED? "s" : "u", FPARG? "fp" : "int", K, N);
+			for(int unsigned  c = 0; c < C; c++) begin
+				$write("[%0d] Channel #%0d: Thresholds = {", i, c);
+				for(int unsigned  i = 0; i < 2**N-1; i++)  $write(" %0X", THRESHS[c][i]);
+				$display(" }");
+			end
+
+			// Config
+			cfg_en = 0;
+			cfg_we = 'x;
+			cfg_a  = 'x;
+			cfg_d  = 'x;
+
+			// Stream Input
+			ivld = 0;
+			idat = 'x;
+
+			@(posedge clk iff !rst);
+
+			// Threshold Configuratin
+			cfg_en <= 1;
+			cfg_we <= 1;
+			for(int unsigned  c = 0; c < C; c+=PE) begin
+				if(CF > 1)  cfg_a[N+$clog2(PE)+:$clog2(CF)] <= c/PE;
+				for(int unsigned  pe = 0; pe < PE; pe++) begin
+					if(PE > 1)  cfg_a[N+:$clog2(PE)] = pe;
+					for(int unsigned  t = 0; t < 2**N-1; t++) begin
+						cfg_a[0+:N] <= t;
+						cfg_d <= THRESHS[c+pe][t];
+						@(posedge clk);
+					end
+				end
+			end
+			cfg_d <= 'x;
+
+			fork
+				// Intermittent configuration readback
+				while(!done) begin
+					cfg_en <= 0;
+					cfg_we <= 'x;
+					cfg_a  <= 'x;
+					@(posedge clk);
+					if(($urandom()%37) == 0) begin
+						automatic addr_t  addr = $urandom()%(N-1);
+						if(PE > 1)  addr[N+:$clog2(PE)] = $urandom()%PE;
+						if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF;
+
+						cfg_en <= 1;
+						cfg_we <= 0;
+						cfg_a  <= addr;
+						@(posedge clk);
+						QC.push_back(addr);
+					end
+				end
+
+				// AXI4Stream MST Writes input values
+				repeat(MST_STRM_WROUNDS) begin
+					automatic input_t  dat;
+
+					while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk);
+
+					std::randomize(dat);
+					ivld <= 1;
+					idat <= dat;
+					@(posedge clk iff irdy);
+					ivld <=  0;
+					idat <= 'x;
+					QW.push_back(dat);
+				end
+			join_any
+			done <= 1;
+			repeat(N+6)  @(posedge clk);
+
+			assert(QW.size() == 0) else begin
+				$error("[%0d] Missing %0d outputs.", i, QW.size());
+				$stop;
+			end
+			assert(QC.size() == 0) else begin
+				$error("[%0d] Missing %0d readback replies.", i, QC.size());
+				$stop;
+			end
+
+			$display("[%0d] Test completed: %0d errors in %0d tests.", i, error_cnt, MST_STRM_WROUNDS);
+			$display("=============================================");
+			term[i] <= 1;
+		end
+
+		//- Readback Checker --------------
+		always_ff @(posedge clk iff cfg_rack) begin
+			assert(QC.size()) begin
+				automatic addr_t  addr = QC.pop_front();
+				automatic int unsigned  cnl =
+					(CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) +
+					(PE == 1? 0 : addr[N+:$clog2(PE)]);
+				automatic logic [K-1:0]  exp = THRESHS[cnl][addr[0+:N]];
+				assert(cfg_q == exp) else begin
+					$error("[%0d] Readback mismatch on #%0d.%0d: %0d instead of %0d", i, cnl, addr[0+:N], cfg_q, exp);
+					$stop;
+				end
+			end
+			else begin
+				$error("[%0d] Spurious readback output.", i);
+				$stop;
+			end
+		end
+
+		// Output Checker
+		int unsigned  OCnl = 0;
+		always @(posedge clk) begin
+			if(rst) begin
+				OCnl <= 0;
+				ordy <= 1'b0;
+			end
+			else begin
+				if(!ordy || ovld)  ordy <= ($urandom()%5 != 0) || !THROTTLED;
+
+				if(ordy && ovld) begin
+					assert(QW.size()) begin
+						automatic input_t  x = QW.pop_front();
+
+						for(int unsigned  pe = 0; pe < PE; pe++) begin
+							automatic int unsigned  cnl = OCnl + pe;
+
+							$display("[%0d] Mapped CNL=%0d DAT=%3x -> #%2d", i, cnl, x[pe], odat[pe]);
+							assert(
+								((odat[pe] == 0) || (sigord(THRESHS[cnl][odat[pe]-1]) <= sigord(x[pe]))) &&
+								((odat[pe] == 2**N-1) || (sigord(x[pe]) < sigord(THRESHS[cnl][odat[pe]])))
+							) else begin
+								$error("[%0d] Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", i, cnl, x[pe], odat[pe]);
+								error_cnt++;
+								$stop;
+							end
+						end
+					end
+					else begin
+						$error("[%0d] Spurious output.", i);
+						$stop;
+					end
+
+					OCnl <= (OCnl + PE)%C;
+				end
+			end
+		end
+
+	end : genTypes
+
+endmodule: thresholding_tb

From 28e5ad7d81d32f0bf26aac773aa50db40a289c55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 27 Jun 2023 06:50:20 +0100
Subject: [PATCH 097/111] Packaged IP for RTL thresholding implementation.

---
 finn-rtllib/thresholding/component.xml        | 1002 +++++++++++++++++
 .../gui/thresholding_axi_v1_0.gtcl            |    4 +
 .../thresholding/hdl/thresholding_axi.sv      |    4 +-
 .../hdl/thresholding_axi_wrapper.v            |  110 ++
 .../xgui/thresholding_axi_v1_0.tcl            |  187 +++
 5 files changed, 1305 insertions(+), 2 deletions(-)
 create mode 100644 finn-rtllib/thresholding/component.xml
 create mode 100644 finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl
 create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
 create mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl

diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml
new file mode 100644
index 0000000000..e28a3a2c2d
--- /dev/null
+++ b/finn-rtllib/thresholding/component.xml
@@ -0,0 +1,1002 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<spirit:component xmlns:xilinx="http://www.xilinx.com" xmlns:spirit="http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <spirit:vendor>amd.com</spirit:vendor>
+  <spirit:library>finn</spirit:library>
+  <spirit:name>thresholding_axi</spirit:name>
+  <spirit:version>1.0</spirit:version>
+  <spirit:busInterfaces>
+    <spirit:busInterface>
+      <spirit:name>ap_clk</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>CLK</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_clk</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_RESET</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_RESET">ap_rst_n</spirit:value>
+        </spirit:parameter>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_BUSIF</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_BUSIF">s_axilite:s_axis:m_axis</spirit:value>
+        </spirit:parameter>
+        <spirit:parameter>
+          <spirit:name>FREQ_TOLERANCE_HZ</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.FREQ_TOLERANCE_HZ">-1</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>m_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:master/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm_rtl" spirit:version="1.0"/>
+      <spirit:slave>
+        <spirit:memoryMapRef spirit:memoryMapRef="s_axilite"/>
+      </spirit:slave>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WSTRB</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WSTRB</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>ap_rst_n</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RST</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_rst_n</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>POLARITY</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_RST_N.POLARITY" spirit:choiceRef="choice_list_9d8b0d81">ACTIVE_LOW</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+  </spirit:busInterfaces>
+  <spirit:memoryMaps>
+    <spirit:memoryMap>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:displayName>s_axilite</spirit:displayName>
+      <spirit:addressBlock>
+        <spirit:name>reg0</spirit:name>
+        <spirit:displayName>reg0</spirit:displayName>
+        <spirit:baseAddress spirit:format="bitString" spirit:bitStringLength="1">0x0</spirit:baseAddress>
+        <spirit:range spirit:format="long" spirit:resolve="dependent" spirit:dependency="pow(2,(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1) - 0 + 1)" spirit:minimum="4096" spirit:rangeType="long">4096</spirit:range>
+        <spirit:width spirit:format="long">32</spirit:width>
+        <spirit:usage>register</spirit:usage>
+      </spirit:addressBlock>
+    </spirit:memoryMap>
+  </spirit:memoryMaps>
+  <spirit:model>
+    <spirit:views>
+      <spirit:view>
+        <spirit:name>xilinx_anylanguagesynthesis</spirit:name>
+        <spirit:displayName>Synthesis</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:synthesis</spirit:envIdentifier>
+        <spirit:language>Verilog</spirit:language>
+        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_anylanguagesynthesis_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fd0bd85b</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_anylanguagebehavioralsimulation</spirit:name>
+        <spirit:displayName>Simulation</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:simulation</spirit:envIdentifier>
+        <spirit:language>Verilog</spirit:language>
+        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fd0bd85b</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_xpgui</spirit:name>
+        <spirit:displayName>UI Layout</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:xgui.ui</spirit:envIdentifier>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_xpgui_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fc6b9b63</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_utilityxitfiles</spirit:name>
+        <spirit:displayName>Utility XIT/TTCL</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:xit.util</spirit:envIdentifier>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_utilityxitfiles_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>8b0215cd</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+    </spirit:views>
+    <spirit:ports>
+      <spirit:port>
+        <spirit:name>ap_clk</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>ap_rst_n</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1)">5</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WSTRB</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">3</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1)">5</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(((((spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)) * spirit:decode(id(&apos;MODELPARAM_VALUE.K&apos;))) + 7) / 8) * 8) - 1)">15</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(((((spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)) * spirit:decode(id(&apos;MODELPARAM_VALUE.O_BITS&apos;))) + 7) / 8) * 8) - 1)">7</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+    </spirit:ports>
+    <spirit:modelParameters>
+      <spirit:modelParameter xsi:type="spirit:nameValueTypeType" spirit:dataType="integer">
+        <spirit:name>N</spirit:name>
+        <spirit:displayName>N</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.N">4</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>K</spirit:name>
+        <spirit:displayName>K</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.K">16</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>C</spirit:name>
+        <spirit:displayName>C</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>PE</spirit:name>
+        <spirit:displayName>Pe</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.PE">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>SIGNED</spirit:name>
+        <spirit:displayName>Signed</spirit:displayName>
+        <spirit:value spirit:format="bool" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.SIGNED">true</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>FPARG</spirit:name>
+        <spirit:displayName>Fparg</spirit:displayName>
+        <spirit:value spirit:format="bool" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.FPARG">false</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>BIAS</spirit:name>
+        <spirit:displayName>Bias</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.BIAS">0</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>CF</spirit:name>
+        <spirit:displayName>Cf</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.CF" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.C&apos;)) / spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)))">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>ADDR_BITS</spirit:name>
+        <spirit:displayName>Addr Bits</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.ADDR_BITS" spirit:dependency="(((spirit:ceil(spirit:log(2,spirit:decode(id(&apos;MODELPARAM_VALUE.CF&apos;)))) + spirit:ceil(spirit:log(2,spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;))))) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) + 2)">6</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>O_BITS</spirit:name>
+        <spirit:displayName>O Bits</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.O_BITS" spirit:dependency="spirit:ceil(spirit:log(2,((2 ** spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) + spirit:decode(id(&apos;MODELPARAM_VALUE.BIAS&apos;)))))">4</spirit:value>
+      </spirit:modelParameter>
+    </spirit:modelParameters>
+  </spirit:model>
+  <spirit:choices>
+    <spirit:choice>
+      <spirit:name>choice_list_9d8b0d81</spirit:name>
+      <spirit:enumeration>ACTIVE_HIGH</spirit:enumeration>
+      <spirit:enumeration>ACTIVE_LOW</spirit:enumeration>
+    </spirit:choice>
+  </spirit:choices>
+  <spirit:fileSets>
+    <spirit:fileSet>
+      <spirit:name>xilinx_anylanguagesynthesis_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>hdl/thresholding.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_7b8c102d</spirit:userFileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/axilite_if.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_69d1ba26</spirit:userFileType>
+        <spirit:logicalName>xil_defaultlib</spirit:logicalName>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>hdl/thresholding.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/axilite_if.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>USED_IN_ipstatic</spirit:userFileType>
+        <spirit:logicalName>xil_defaultlib</spirit:logicalName>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_xpgui_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>xgui/thresholding_axi_v1_0.tcl</spirit:name>
+        <spirit:fileType>tclSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_fc6b9b63</spirit:userFileType>
+        <spirit:userFileType>XGUI_VERSION_2</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_utilityxitfiles_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>gui/thresholding_axi_v1_0.gtcl</spirit:name>
+        <spirit:userFileType>GTCL</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+  </spirit:fileSets>
+  <spirit:description>MultiThreshold</spirit:description>
+  <spirit:parameters>
+    <spirit:parameter>
+      <spirit:name>N</spirit:name>
+      <spirit:displayName>Output Precision</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.N">4</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>K</spirit:name>
+      <spirit:displayName>Input Precision</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.K">16</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>C</spirit:name>
+      <spirit:displayName>Channels</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C">1</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>PE</spirit:name>
+      <spirit:displayName>Pe</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.PE">1</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>SIGNED</spirit:name>
+      <spirit:displayName>Signed Inputs</spirit:displayName>
+      <spirit:value spirit:format="bool" spirit:resolve="user" spirit:id="PARAM_VALUE.SIGNED">true</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>FPARG</spirit:name>
+      <spirit:displayName>Floating-Point Inputs</spirit:displayName>
+      <spirit:value spirit:format="bool" spirit:resolve="user" spirit:id="PARAM_VALUE.FPARG">false</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>BIAS</spirit:name>
+      <spirit:displayName>Bias</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.BIAS">0</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>CF</spirit:name>
+      <spirit:displayName>Channel Fold</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.CF">1</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.CF">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>ADDR_BITS</spirit:name>
+      <spirit:displayName>Address Bits</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.ADDR_BITS">6</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.ADDR_BITS">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>O_BITS</spirit:name>
+      <spirit:displayName>Output Value Width</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.O_BITS">4</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.O_BITS">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>Component_Name</spirit:name>
+      <spirit:value spirit:resolve="user" spirit:id="PARAM_VALUE.Component_Name" spirit:order="1">thresholding_axi_wrapper_v1_0</spirit:value>
+    </spirit:parameter>
+  </spirit:parameters>
+  <spirit:vendorExtensions>
+    <xilinx:coreExtensions>
+      <xilinx:supportedFamilies>
+        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">akintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">azynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">spartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aspartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus58g</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artixuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
+      </xilinx:supportedFamilies>
+      <xilinx:taxonomies>
+        <xilinx:taxonomy>/UserIP</xilinx:taxonomy>
+      </xilinx:taxonomies>
+      <xilinx:displayName>thresholding_axi</xilinx:displayName>
+      <xilinx:autoFamilySupportLevel>level_1</xilinx:autoFamilySupportLevel>
+      <xilinx:definitionSource>package_project</xilinx:definitionSource>
+      <xilinx:coreRevision>2</xilinx:coreRevision>
+      <xilinx:upgrades>
+        <xilinx:canUpgradeFrom>user.org:user:thresholding_axi_wrapper:1.0</xilinx:canUpgradeFrom>
+      </xilinx:upgrades>
+      <xilinx:coreCreationDateTime>2023-06-27T05:47:20Z</xilinx:coreCreationDateTime>
+      <xilinx:tags>
+        <xilinx:tag xilinx:name="nopcore"/>
+      </xilinx:tags>
+    </xilinx:coreExtensions>
+    <xilinx:packagingInfo>
+      <xilinx:xilinxVersion>2022.2</xilinx:xilinxVersion>
+      <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="caf1c8b2"/>
+      <xilinx:checksum xilinx:scope="memoryMaps" xilinx:value="5b88f249"/>
+      <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="5b2de4fb"/>
+      <xilinx:checksum xilinx:scope="ports" xilinx:value="8d01c0f6"/>
+      <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="b7d69776"/>
+      <xilinx:checksum xilinx:scope="parameters" xilinx:value="90d363ee"/>
+      <xilinx:targetDRCs>
+        <xilinx:targetDRC xilinx:tool="ipi">
+          <xilinx:targetDRCOption xilinx:name="ignore_freq_hz" xilinx:value="true"/>
+        </xilinx:targetDRC>
+      </xilinx:targetDRCs>
+    </xilinx:packagingInfo>
+  </spirit:vendorExtensions>
+</spirit:component>
diff --git a/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl
new file mode 100644
index 0000000000..90d73ede7e
--- /dev/null
+++ b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl
@@ -0,0 +1,4 @@
+# This file is automatically written.  Do not modify.
+proc gen_USERPARAMETER_CF_VALUE {C PE } {expr $C/$PE}
+proc gen_USERPARAMETER_ADDR_BITS_VALUE {C PE N } {expr int(ceil(log($C/$PE)/log(2))+ceil(log($PE)/log(2))+$N+2)}
+proc gen_USERPARAMETER_O_BITS_VALUE {BIAS N } {expr int(ceil($BIAS >= 0? log(pow(2,$N)+$BIAS)/log(2) : 1+log(-$BIAS >= pow(2,$N-1)? -$BIAS : pow(2,$N)+$BIAS)/log(2)))}
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 98bbe20691..53066901fb 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -39,8 +39,8 @@
  *****************************************************************************/
 
 module thresholding_axi #(
-	int unsigned  N =  4,	// output precision
-	int unsigned  K = 16,	// input/threshold precision
+	int unsigned  N,		// output precision
+	int unsigned  K,		// input/threshold precision
 	int unsigned  C = 1,	// Channels
 	int unsigned  PE = 1,	// Processing Parallelism, requires C = k*PE
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
new file mode 100644
index 0000000000..14c2c13bfd
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
@@ -0,0 +1,110 @@
+/**
+ * Copyright (c) 2023, Xilinx
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of FINN nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author	Thomas B. Preußer <thomas.preusser@amd.com>
+ * @brief	Verilog wrapper for IP packaging.
+ */
+
+module thresholding_axi_wrapper #(
+	parameter  N =  4,	// output precision
+	parameter  K = 16,	// input/threshold precision
+	parameter  C =  1,	// Channels
+	parameter  PE = 1,	// Processing Parallelism, requires C = k*PE
+
+	parameter  SIGNED = 1,	// signed inputs
+	parameter  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
+	parameter  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+
+	parameter  CF = C/PE,	// Channel Fold
+	parameter  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
+	parameter  O_BITS = $clog2(2**N+BIAS)
+)(
+	// Global Control
+	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *)
+	(* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *)
+	input	ap_clk,
+	(* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *)
+	input	ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input                  s_axilite_AWVALID,
+	output                 s_axilite_AWREADY,
+	input [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+
+	input         s_axilite_WVALID,
+	output        s_axilite_WREADY,
+	input [31:0]  s_axilite_WDATA,
+	input [ 3:0]  s_axilite_WSTRB,
+
+	output        s_axilite_BVALID,
+	input         s_axilite_BREADY,
+	output [1:0]  s_axilite_BRESP,
+
+	// Reading
+	input                  s_axilite_ARVALID,
+	output                 s_axilite_ARREADY,
+	input [ADDR_BITS-1:0]  s_axilite_ARADDR,
+
+	output         s_axilite_RVALID,
+	input          s_axilite_RREADY,
+	output [31:0]  s_axilite_RDATA,
+	output [ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output  s_axis_tready,
+	input   s_axis_tvalid,
+	input [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
+
+	//- AXI Stream - Output -------------
+	input   m_axis_tready,
+	output  m_axis_tvalid,
+	output [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
+);
+
+	thresholding_axi #(
+		.N(N), .K(K), .C(C), .PE(PE),
+		.SIGNED(SIGNED),
+		.FPARG(FPARG),
+		.BIAS(BIAS)
+	) core (
+		.ap_clk, .ap_rst_n,
+
+		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
+		.s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB,
+		.s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP,
+
+		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
+		.s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP,
+
+		.s_axis_tready, .s_axis_tvalid, .s_axis_tdata,
+		.m_axis_tready, .m_axis_tvalid, .m_axis_tdata
+	);
+
+endmodule : thresholding_axi_wrapper
diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl
new file mode 100644
index 0000000000..338304fa40
--- /dev/null
+++ b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl
@@ -0,0 +1,187 @@
+
+# Loading additional proc with user specified bodies to compute parameter values.
+source [file join [file dirname [file dirname [info script]]] gui/thresholding_axi_v1_0.gtcl]
+
+# Definitional proc to organize widgets for parameters.
+proc init_gui { IPINST } {
+  ipgui::add_param $IPINST -name "Component_Name"
+  #Adding Page
+  set Page_0 [ipgui::add_page $IPINST -name "Page 0"]
+  ipgui::add_param $IPINST -name "ADDR_BITS" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "BIAS" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "C" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "CF" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "FPARG" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "K" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "N" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "O_BITS" -parent ${Page_0}
+  set PE [ipgui::add_param $IPINST -name "PE" -parent ${Page_0}]
+  set_property tooltip {PE Count} ${PE}
+  ipgui::add_param $IPINST -name "SIGNED" -parent ${Page_0}
+
+
+}
+
+proc update_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS PARAM_VALUE.C PARAM_VALUE.PE PARAM_VALUE.N } {
+	# Procedure called to update ADDR_BITS when any of the dependent parameters in the arguments change
+
+	set ADDR_BITS ${PARAM_VALUE.ADDR_BITS}
+	set C ${PARAM_VALUE.C}
+	set PE ${PARAM_VALUE.PE}
+	set N ${PARAM_VALUE.N}
+	set values(C) [get_property value $C]
+	set values(PE) [get_property value $PE]
+	set values(N) [get_property value $N]
+	set_property value [gen_USERPARAMETER_ADDR_BITS_VALUE $values(C) $values(PE) $values(N)] $ADDR_BITS
+}
+
+proc validate_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS } {
+	# Procedure called to validate ADDR_BITS
+	return true
+}
+
+proc update_PARAM_VALUE.CF { PARAM_VALUE.CF PARAM_VALUE.C PARAM_VALUE.PE } {
+	# Procedure called to update CF when any of the dependent parameters in the arguments change
+
+	set CF ${PARAM_VALUE.CF}
+	set C ${PARAM_VALUE.C}
+	set PE ${PARAM_VALUE.PE}
+	set values(C) [get_property value $C]
+	set values(PE) [get_property value $PE]
+	set_property value [gen_USERPARAMETER_CF_VALUE $values(C) $values(PE)] $CF
+}
+
+proc validate_PARAM_VALUE.CF { PARAM_VALUE.CF } {
+	# Procedure called to validate CF
+	return true
+}
+
+proc update_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS PARAM_VALUE.BIAS PARAM_VALUE.N } {
+	# Procedure called to update O_BITS when any of the dependent parameters in the arguments change
+
+	set O_BITS ${PARAM_VALUE.O_BITS}
+	set BIAS ${PARAM_VALUE.BIAS}
+	set N ${PARAM_VALUE.N}
+	set values(BIAS) [get_property value $BIAS]
+	set values(N) [get_property value $N]
+	set_property value [gen_USERPARAMETER_O_BITS_VALUE $values(BIAS) $values(N)] $O_BITS
+}
+
+proc validate_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS } {
+	# Procedure called to validate O_BITS
+	return true
+}
+
+proc update_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } {
+	# Procedure called to update BIAS when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } {
+	# Procedure called to validate BIAS
+	return true
+}
+
+proc update_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to update C when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to validate C
+	return true
+}
+
+proc update_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } {
+	# Procedure called to update FPARG when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } {
+	# Procedure called to validate FPARG
+	return true
+}
+
+proc update_PARAM_VALUE.K { PARAM_VALUE.K } {
+	# Procedure called to update K when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.K { PARAM_VALUE.K } {
+	# Procedure called to validate K
+	return true
+}
+
+proc update_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to update N when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to validate N
+	return true
+}
+
+proc update_PARAM_VALUE.PE { PARAM_VALUE.PE } {
+	# Procedure called to update PE when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.PE { PARAM_VALUE.PE } {
+	# Procedure called to validate PE
+	return true
+}
+
+proc update_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } {
+	# Procedure called to update SIGNED when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } {
+	# Procedure called to validate SIGNED
+	return true
+}
+
+
+proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N}
+}
+
+proc update_MODELPARAM_VALUE.K { MODELPARAM_VALUE.K PARAM_VALUE.K } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.K}] ${MODELPARAM_VALUE.K}
+}
+
+proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C}
+}
+
+proc update_MODELPARAM_VALUE.PE { MODELPARAM_VALUE.PE PARAM_VALUE.PE } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.PE}] ${MODELPARAM_VALUE.PE}
+}
+
+proc update_MODELPARAM_VALUE.SIGNED { MODELPARAM_VALUE.SIGNED PARAM_VALUE.SIGNED } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.SIGNED}] ${MODELPARAM_VALUE.SIGNED}
+}
+
+proc update_MODELPARAM_VALUE.FPARG { MODELPARAM_VALUE.FPARG PARAM_VALUE.FPARG } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.FPARG}] ${MODELPARAM_VALUE.FPARG}
+}
+
+proc update_MODELPARAM_VALUE.BIAS { MODELPARAM_VALUE.BIAS PARAM_VALUE.BIAS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.BIAS}] ${MODELPARAM_VALUE.BIAS}
+}
+
+proc update_MODELPARAM_VALUE.CF { MODELPARAM_VALUE.CF PARAM_VALUE.CF } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.CF}] ${MODELPARAM_VALUE.CF}
+}
+
+proc update_MODELPARAM_VALUE.ADDR_BITS { MODELPARAM_VALUE.ADDR_BITS PARAM_VALUE.ADDR_BITS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.ADDR_BITS}] ${MODELPARAM_VALUE.ADDR_BITS}
+}
+
+proc update_MODELPARAM_VALUE.O_BITS { MODELPARAM_VALUE.O_BITS PARAM_VALUE.O_BITS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.O_BITS}] ${MODELPARAM_VALUE.O_BITS}
+}

From bc5b73868d90a8fa8e9a5d59233529331d9f4369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Wed, 15 Nov 2023 14:47:07 +0000
Subject: [PATCH 098/111] Allow for custom start-up intialization of
 thresholds.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 28 +++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index bfd7e5d8ff..56038061c2 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -29,7 +29,7 @@
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * @brief	Pipelined thresholding by binary search.
- * @author	Thomas B. Preußer <tpreusse@amd.com>
+ * @author	Thomas B. Preußer <thomas.preusser@amd.com>
  *
  * @description
  *  Produces the N-bit count of those among 2^N-1 thresholds that are not
@@ -42,6 +42,14 @@
  *  with respect to a selectable set of thresholds. The corresponding
  *  threshold configuration relies on a channel address prefix. Inputs are
  *  accompanied by a channel selector.
+ *
+ *  Parameter Layout as seen on AXI-Lite (row by row):
+ *            | Base               \   Offs  |   0    1    2  ...   N-2     N-1
+ *   ---------+------------------------------+----------------------------------
+ *    Chnl #0 |   0                          |  T_0  T_1  T_2 ... T_{N-2}    'x
+ *    Chnl #1 |   N                          |  T_0  T_1  T_2 ... T_{N-2}    'x
+ *    Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*N |  T_0  T_1  T_2 ... T_{N-2}    'x
+ *
  *****************************************************************************/
 module thresholding #(
 	int unsigned  N,  // output precision
@@ -53,6 +61,9 @@ module thresholding #(
 	bit  FPARG  = 0,  // floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,  // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
+	// Initial Thresholds (per channel)
+	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
+
 	localparam int unsigned  CF = C/PE,  // Channel fold
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
@@ -136,7 +147,6 @@ module thresholding #(
 			end
 		end
 
-
 		uwire ptr_t  iptr;
 		assign	iptr[0+:N] = cfg_a[0+:N];
 		if(CF > 1) begin
@@ -180,16 +190,26 @@ module thresholding #(
 			uwire  cs = (p.ptr[SN:0] == 2**SN-1);
 
 			// Threshold Memory
-			logic [K-1:0]  Thresh = 'x;	// Read-out register
+			val_t  Thresh;	// Read-out register
 			if(1) begin : blkThreshMem
 				uwire  we = (p.op ==? WR) && cs;
 				if((CF == 1) && (stage == 0)) begin
+					initial begin
+						Thresh = THRESHOLDS[pe][2**SN-1];
+					end
 					always_ff @(posedge clk) begin
 						if(we)  Thresh <= p.val;
 					end
 				end
 				else begin
-					logic [K-1:0]  Threshs[CF * 2**stage];
+					val_t  Threshs[CF * 2**stage];
+					initial begin
+						for(int unsigned  c = 0; c < CF; c++) begin
+							for(int unsigned  i = 0; i < 2**stage; i++) begin
+								Threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
+							end
+						end
+					end
 					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
 					always_ff @(posedge clk) begin
 						if(we)  Threshs[addr] <= p.val;

From 730bcf83e69a39e881d6b468b69001332bd78b0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Wed, 15 Nov 2023 14:49:55 +0000
Subject: [PATCH 099/111] Make AXI-Lite threshold read/write interface
 optional.

---
 .../thresholding/hdl/thresholding_axi.sv      | 38 +++++++++++++------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 53066901fb..edfbaf891c 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -48,6 +48,11 @@ module thresholding_axi #(
 	bit  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
+	// Initial Thresholds (per channel)
+	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
+
+	bit  HAVE_AXILITE = 1,	// Activate AXI-Lite for threshold read/write
+
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -102,19 +107,28 @@ module thresholding_axi #(
 	uwire [K        -1:0]  cfg_d;
 	uwire  cfg_rack;
 	uwire [K        -1:0]  cfg_q;
-	axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
-		.aclk(ap_clk), .aresetn(ap_rst_n),
-
-		.awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x),
-		.wready(s_axilite_WREADY),   .wvalid(s_axilite_WVALID),   .wdata(s_axilite_WDATA),   .wstrb(s_axilite_WSTRB),
-		.bready(s_axilite_BREADY),   .bvalid(s_axilite_BVALID),   .bresp(s_axilite_BRESP),
 
-		.arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x),
-		.rready(s_axilite_RREADY),   .rvalid(s_axilite_RVALID),   .rresp(s_axilite_RRESP),   .rdata(s_axilite_RDATA),
-
-		.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d),
-		.ip_rack(cfg_rack), .ip_rdata(cfg_q)
-	);
+	if(HAVE_AXILITE) begin
+		axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
+			.aclk(ap_clk), .aresetn(ap_rst_n),
+
+			.awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x),
+			.wready(s_axilite_WREADY),   .wvalid(s_axilite_WVALID),   .wdata(s_axilite_WDATA),   .wstrb(s_axilite_WSTRB),
+			.bready(s_axilite_BREADY),   .bvalid(s_axilite_BVALID),   .bresp(s_axilite_BRESP),
+
+			.arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x),
+			.rready(s_axilite_RREADY),   .rvalid(s_axilite_RVALID),   .rresp(s_axilite_RRESP),   .rdata(s_axilite_RDATA),
+
+			.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d),
+			.ip_rack(cfg_rack), .ip_rdata(cfg_q)
+		);
+	end
+	else begin
+		assign	cfg_en =  0;
+		assign	cfg_we = 'x;
+		assign	cfg_a  = 'x;
+		assign	cfg_d  = 'x;
+	end
 
 	//-----------------------------------------------------------------------
 	// Kernel Implementation

From 95d6a3eca503e388066558989022aebe951a0ce8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Thu, 16 Nov 2023 09:18:20 +0000
Subject: [PATCH 100/111] Double wrapping for both structured SystemVerilog
 parameters and a IPI-compatible Verilog top-level.

---
 .../thresholding/hdl/thresholding_axi.sv      |  10 +-
 .../hdl/thresholding_axi_tpl_inner.sv         | 116 ++++++++++++++++++
 ...wrapper.v => thresholding_axi_tpl_outer.v} |  30 ++---
 3 files changed, 139 insertions(+), 17 deletions(-)
 create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
 rename finn-rtllib/thresholding/hdl/{thresholding_axi_wrapper.v => thresholding_axi_tpl_outer.v} (75%)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index edfbaf891c..20bdff6d25 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -51,7 +51,7 @@ module thresholding_axi #(
 	// Initial Thresholds (per channel)
 	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
 
-	bit  HAVE_AXILITE = 1,	// Activate AXI-Lite for threshold read/write
+	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
 
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
@@ -108,7 +108,7 @@ module thresholding_axi #(
 	uwire  cfg_rack;
 	uwire [K        -1:0]  cfg_q;
 
-	if(HAVE_AXILITE) begin
+	if(USE_AXILITE) begin
 		axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
 			.aclk(ap_clk), .aresetn(ap_rst_n),
 
@@ -132,7 +132,11 @@ module thresholding_axi #(
 
 	//-----------------------------------------------------------------------
 	// Kernel Implementation
-	thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS)) impl (
+	thresholding #(
+		.N(N), .K(K), .C(C), .PE(PE),
+		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
+		.THRESHOLDS(THRESHOLDS)
+	) impl (
 		.clk(ap_clk), .rst(!ap_rst_n),
 
 		.cfg_en, .cfg_we, .cfg_a, .cfg_d,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
new file mode 100644
index 0000000000..4c28e391c8
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -0,0 +1,116 @@
+/**
+ * Copyright (c) 2023, Xilinx
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of FINN nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author	Thomas B. Preußer <thomas.preusser@amd.com>
+ * @brief	Verilog wrapper for IP packaging.
+ */
+
+module thresholding_axi_tpl_inner #(
+	int unsigned  N,	// output precision
+	int unsigned  K,	// input/threshold precision
+	int unsigned  C,	// Channels
+	int unsigned  PE,	// Processing Parallelism, requires C = k*PE
+
+	int unsigned  SIGNED,	// signed inputs
+	int unsigned  FPARG,	// floating-point inputs: [sign] | exponent | mantissa
+	int unsigned  BIAS,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+
+	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{
+		'{ 'hC0, 'hC1, 'hC2, 'hC3, 'hC4, 'hC5, 'hC6, 'hC7, 'hC8, 'hC9, 'hCa, 'hCb, 'hCc, 'hCd, 'hCe },
+		'{ 'hD0, 'hD1, 'hD2, 'hD3, 'hD4, 'hD5, 'hD6, 'hD7, 'hD8, 'hD9, 'hDa, 'hDb, 'hDc, 'hDd, 'hDe },
+		'{ 'hE0, 'hE1, 'hE2, 'hE3, 'hE4, 'hE5, 'hE6, 'hE7, 'hE8, 'hE9, 'hEa, 'hEb, 'hEc, 'hEd, 'hEe },
+		'{ 'hF0, 'hF1, 'hF2, 'hF3, 'hF4, 'hF5, 'hF6, 'hF7, 'hF8, 'hF9, 'hFa, 'hFb, 'hFc, 'hFd, 'hFe }
+	},
+	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
+
+	localparam int unsigned  CF = C/PE,	// Channel Fold
+	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
+	localparam int unsigned  O_BITS = $clog2(2**N+BIAS)
+)(
+	// Global Control
+	input	ap_clk,
+	input	ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input                  s_axilite_AWVALID,
+	output                 s_axilite_AWREADY,
+	input [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+
+	input         s_axilite_WVALID,
+	output        s_axilite_WREADY,
+	input [31:0]  s_axilite_WDATA,
+	input [ 3:0]  s_axilite_WSTRB,
+
+	output        s_axilite_BVALID,
+	input         s_axilite_BREADY,
+	output [1:0]  s_axilite_BRESP,
+
+	// Reading
+	input                  s_axilite_ARVALID,
+	output                 s_axilite_ARREADY,
+	input [ADDR_BITS-1:0]  s_axilite_ARADDR,
+
+	output         s_axilite_RVALID,
+	input          s_axilite_RREADY,
+	output [31:0]  s_axilite_RDATA,
+	output [ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output  s_axis_tready,
+	input   s_axis_tvalid,
+	input [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
+
+	//- AXI Stream - Output -------------
+	input   m_axis_tready,
+	output  m_axis_tvalid,
+	output [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
+);
+
+	thresholding_axi #(
+		.N(N), .K(K), .C(C), .PE(PE),
+		.SIGNED(SIGNED),
+		.FPARG(FPARG),
+		.BIAS(BIAS),
+		.THRESHOLDS(THRESHOLDS),
+		.USE_AXILITE(USE_AXILITE)
+	) core (
+		.ap_clk, .ap_rst_n,
+
+		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
+		.s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB,
+		.s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP,
+
+		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
+		.s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP,
+		.s_axis_tready, .s_axis_tvalid, .s_axis_tdata,
+		.m_axis_tready, .m_axis_tvalid, .m_axis_tdata
+	);
+
+endmodule : thresholding_axi_tpl_inner
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
similarity index 75%
rename from finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
rename to finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
index 14c2c13bfd..5dfe58287d 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
@@ -31,16 +31,18 @@
  * @brief	Verilog wrapper for IP packaging.
  */
 
-module thresholding_axi_wrapper #(
+module thresholding_axi_tpl_outer #(
 	parameter  N =  4,	// output precision
 	parameter  K = 16,	// input/threshold precision
-	parameter  C =  1,	// Channels
+	parameter  C =  4,	// Channels
 	parameter  PE = 1,	// Processing Parallelism, requires C = k*PE
 
 	parameter  SIGNED = 1,	// signed inputs
 	parameter  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
 	parameter  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
+	parameter  USE_AXILITE = 0,	// Implement AXI-Lite for threshold read/write
+
 	parameter  CF = C/PE,	// Channel Fold
 	parameter  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
 	parameter  O_BITS = $clog2(2**N+BIAS)
@@ -88,23 +90,23 @@ module thresholding_axi_wrapper #(
 	output [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
 );
 
-	thresholding_axi #(
+	thresholding_axi_tpl_inner #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED),
 		.FPARG(FPARG),
-		.BIAS(BIAS)
+		.BIAS(BIAS),
+		.USE_AXILITE(USE_AXILITE)
 	) core (
-		.ap_clk, .ap_rst_n,
-
-		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
-		.s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB,
-		.s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP,
+		.ap_clk(ap_clk), .ap_rst_n(ap_rst_n),
 
-		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
-		.s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP,
+		.s_axilite_AWVALID(s_axilite_AWVALID), .s_axilite_AWREADY(s_axilite_AWREADY), .s_axilite_AWADDR(s_axilite_AWADDR),
+		.s_axilite_WVALID(s_axilite_WVALID), .s_axilite_WREADY(s_axilite_WREADY), .s_axilite_WDATA(s_axilite_WDATA), .s_axilite_WSTRB(s_axilite_WSTRB),
+		.s_axilite_BVALID(s_axilite_BVALID), .s_axilite_BREADY(s_axilite_BREADY), .s_axilite_BRESP(s_axilite_BRESP),
 
-		.s_axis_tready, .s_axis_tvalid, .s_axis_tdata,
-		.m_axis_tready, .m_axis_tvalid, .m_axis_tdata
+		.s_axilite_ARVALID(s_axilite_ARVALID), .s_axilite_ARREADY(s_axilite_ARREADY), .s_axilite_ARADDR(s_axilite_ARADDR),
+		.s_axilite_RVALID(s_axilite_RVALID), .s_axilite_RREADY(s_axilite_RREADY), .s_axilite_RDATA(s_axilite_RDATA), .s_axilite_RRESP(s_axilite_RRESP),
+		.s_axis_tready(s_axis_tready), .s_axis_tvalid(s_axis_tvalid), .s_axis_tdata(s_axis_tdata),
+		.m_axis_tready(m_axis_tready), .m_axis_tvalid(m_axis_tvalid), .m_axis_tdata(m_axis_tdata)
 	);
 
-endmodule : thresholding_axi_wrapper
+endmodule // thresholding_axi_tpl_outer

From 8003c9145d681fa5cf39720ad280d469533dbca1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Thu, 16 Nov 2023 09:29:22 +0000
Subject: [PATCH 101/111] Templating the wrapper layers for specialization by
 FINN compiler.

---
 .../hdl/thresholding_axi_tpl_inner.sv         |  7 +---
 .../hdl/thresholding_axi_tpl_outer.v          | 32 +++++++++----------
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
index 4c28e391c8..f52d8d6a31 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -41,12 +41,7 @@ module thresholding_axi_tpl_inner #(
 	int unsigned  FPARG,	// floating-point inputs: [sign] | exponent | mantissa
 	int unsigned  BIAS,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{
-		'{ 'hC0, 'hC1, 'hC2, 'hC3, 'hC4, 'hC5, 'hC6, 'hC7, 'hC8, 'hC9, 'hCa, 'hCb, 'hCc, 'hCd, 'hCe },
-		'{ 'hD0, 'hD1, 'hD2, 'hD3, 'hD4, 'hD5, 'hD6, 'hD7, 'hD8, 'hD9, 'hDa, 'hDb, 'hDc, 'hDd, 'hDe },
-		'{ 'hE0, 'hE1, 'hE2, 'hE3, 'hE4, 'hE5, 'hE6, 'hE7, 'hE8, 'hE9, 'hEa, 'hEb, 'hEc, 'hEd, 'hEe },
-		'{ 'hF0, 'hF1, 'hF2, 'hF3, 'hF4, 'hF5, 'hF6, 'hF7, 'hF8, 'hF9, 'hFa, 'hFb, 'hFc, 'hFd, 'hFe }
-	},
+	logic [K-1:0]  THRESHOLDS[C][2**N-1] = $THRESHOLDS$,
 	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
 
 	localparam int unsigned  CF = C/PE,	// Channel Fold
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
index 5dfe58287d..3521987b66 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
@@ -32,20 +32,18 @@
  */
 
 module thresholding_axi_tpl_outer #(
-	parameter  N =  4,	// output precision
-	parameter  K = 16,	// input/threshold precision
-	parameter  C =  4,	// Channels
-	parameter  PE = 1,	// Processing Parallelism, requires C = k*PE
+	parameter  N = $N$,	// output precision
+	parameter  K = $M$,	// input/threshold precision
+	parameter  C = $C$,	// Channels
+	parameter  PE = $PE$,	// Processing Parallelism, requires C = k*PE
 
-	parameter  SIGNED = 1,	// signed inputs
-	parameter  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
-	parameter  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+	parameter  SIGNED = $SIGNED$,	// signed inputs
+	parameter  FPARG  = 0,			// floating-point inputs: [sign] | exponent | mantissa
+	parameter  BIAS   = $BIAS$,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	parameter  USE_AXILITE = 0,	// Implement AXI-Lite for threshold read/write
+	parameter  USE_AXILITE = $USE_AXILITE$,	// Implement AXI-Lite for threshold read/write
 
-	parameter  CF = C/PE,	// Channel Fold
-	parameter  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
-	parameter  O_BITS = $clog2(2**N+BIAS)
+	parameter  O_BITS = $O_BITS$
 )(
 	// Global Control
 	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *)
@@ -56,9 +54,9 @@ module thresholding_axi_tpl_outer #(
 
 	//- AXI Lite ------------------------
 	// Writing
-	input                  s_axilite_AWVALID,
-	output                 s_axilite_AWREADY,
-	input [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+	input   s_axilite_AWVALID,
+	output  s_axilite_AWREADY,
+	input [$clog2(C/PE) + $clog2(PE) + N + 1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
 
 	input         s_axilite_WVALID,
 	output        s_axilite_WREADY,
@@ -70,9 +68,9 @@ module thresholding_axi_tpl_outer #(
 	output [1:0]  s_axilite_BRESP,
 
 	// Reading
-	input                  s_axilite_ARVALID,
-	output                 s_axilite_ARREADY,
-	input [ADDR_BITS-1:0]  s_axilite_ARADDR,
+	input   s_axilite_ARVALID,
+	output  s_axilite_ARREADY,
+	input [$clog2(C/PE) + $clog2(PE) + N + 1:0]  s_axilite_ARADDR,
 
 	output         s_axilite_RVALID,
 	input          s_axilite_RREADY,

From d9db2574f73b1fc7e88c3fe46d4d2d853b71f5b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 20 Nov 2023 08:06:10 +0000
Subject: [PATCH 102/111] Replicate correct O_BITS computation for negative
 BIASes in inner wrapper.

---
 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
index f52d8d6a31..34a2d46706 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -46,7 +46,9 @@ module thresholding_axi_tpl_inner #(
 
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
-	localparam int unsigned  O_BITS = $clog2(2**N+BIAS)
+	localparam int unsigned  O_BITS = BIAS >= 0?
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
 )(
 	// Global Control
 	input	ap_clk,

From 073844ac2de17d84d548e0b71527d71861f67c54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 20 Nov 2023 18:40:10 +0000
Subject: [PATCH 103/111] Correcting wrong unsigned interpretation of BIAS in
 inner wrapper.

---
 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
index 34a2d46706..ddda5a88ed 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -39,7 +39,7 @@ module thresholding_axi_tpl_inner #(
 
 	int unsigned  SIGNED,	// signed inputs
 	int unsigned  FPARG,	// floating-point inputs: [sign] | exponent | mantissa
-	int unsigned  BIAS,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+	int  BIAS,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
 	logic [K-1:0]  THRESHOLDS[C][2**N-1] = $THRESHOLDS$,
 	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write

From 98184ac959b9a901bba07ee6d9f2252c0cb51ba8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Mon, 20 Nov 2023 19:01:59 +0000
Subject: [PATCH 104/111] Working around an LRM ambiguity when also having
 assignments in initial blocks.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 56038061c2..6ecccbe7b6 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -197,7 +197,7 @@ module thresholding #(
 					initial begin
 						Thresh = THRESHOLDS[pe][2**SN-1];
 					end
-					always_ff @(posedge clk) begin
+					always @(posedge clk) begin
 						if(we)  Thresh <= p.val;
 					end
 				end
@@ -211,7 +211,7 @@ module thresholding #(
 						end
 					end
 					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
-					always_ff @(posedge clk) begin
+					always @(posedge clk) begin
 						if(we)  Threshs[addr] <= p.val;
 						Thresh <= Threshs[addr];
 					end

From 528184d0b34f4b64a21aa92570e8768ed0ec4507 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 21 Nov 2023 08:01:55 +0000
Subject: [PATCH 105/111] Attempt to mitigate long elaboration times by more
 explicit configuration masking.

---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 66 +++++++++++------
 .../thresholding/hdl/thresholding_axi.sv      |  2 +-
 .../thresholding/sim/thresholding_axi_tb.sv   | 72 ++++++++++---------
 .../thresholding/sim/thresholding_tb.sv       |  2 +-
 4 files changed, 84 insertions(+), 58 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 6ecccbe7b6..c56e2a994e 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -63,6 +63,7 @@ module thresholding #(
 
 	// Initial Thresholds (per channel)
 	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
+	bit  USE_CONFIG = 1,
 
 	localparam int unsigned  CF = C/PE,  // Channel fold
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -132,7 +133,7 @@ module thresholding #(
 		always_ff @(posedge clk) begin
 			if(rst)  GuardSem <= N+2;
 			else begin
-				automatic logic  dec = !cfg_en && !th_full && ivld;
+				automatic logic  dec = !(USE_CONFIG && cfg_en) && !th_full && ivld;
 				automatic logic  inc = ovld && ordy;
 				GuardSem <= GuardSem + (inc == dec? 0 : inc? 1 : -1);
 			end
@@ -143,7 +144,7 @@ module thresholding #(
 		if(PE == 1)  assign  cfg_sel[0] = 1;
 		else begin
 			for(genvar  pe = 0; pe < PE; pe++) begin
-				assign	cfg_sel[pe] = cfg_en && (cfg_a[N+:$clog2(PE)] == pe);
+				assign	cfg_sel[pe] = USE_CONFIG && cfg_en && (cfg_a[N+:$clog2(PE)] == pe);
 			end
 		end
 
@@ -158,26 +159,26 @@ module thresholding #(
 					CnlCnt <= 0;
 					CnlLst <= 0;
 				end
-				else if(!cfg_en && !th_full && ivld) begin
+				else if(!(USE_CONFIG && cfg_en) && !th_full && ivld) begin
 					CnlCnt <= CnlCnt + (CnlLst? 1-CF : 1);
 					CnlLst <= CnlCnt == CF-2;
 				end
 			end
 
-			assign  iptr[N+:$clog2(CF)] = cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt;
+			assign  iptr[N+:$clog2(CF)] = USE_CONFIG && cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt;
 		end
 
 		for(genvar  pe = 0; pe < PE; pe++) begin
 			assign	pipe[pe][0] = '{
-				op:  cfg_en?
+				op:  USE_CONFIG && cfg_en?
 					(!cfg_sel[pe]? NOP : cfg_we? WR : RB) :
 					(ivld && !th_full? TH : NOP),
 				ptr: iptr,
-				val: !cfg_en? idat[pe] : cfg_we? cfg_d : 0
+				val: !(USE_CONFIG && cfg_en)? idat[pe] : cfg_we? cfg_d : 0
 			};
 		end
 
-		assign	irdy = !cfg_en && !th_full;
+		assign	irdy = !(USE_CONFIG && cfg_en) && !th_full;
 	end : blkFeed
 
 	//-----------------------------------------------------------------------
@@ -191,17 +192,10 @@ module thresholding #(
 
 			// Threshold Memory
 			val_t  Thresh;	// Read-out register
-			if(1) begin : blkThreshMem
-				uwire  we = (p.op ==? WR) && cs;
-				if((CF == 1) && (stage == 0)) begin
-					initial begin
-						Thresh = THRESHOLDS[pe][2**SN-1];
-					end
-					always @(posedge clk) begin
-						if(we)  Thresh <= p.val;
-					end
-				end
-				else begin
+			if(1) begin : blkThresh
+
+				uwire val_t  threshs[CF * 2**stage];
+				if(USE_CONFIG) begin : genThreshMem
 					val_t  Threshs[CF * 2**stage];
 					initial begin
 						for(int unsigned  c = 0; c < CF; c++) begin
@@ -210,13 +204,41 @@ module thresholding #(
 							end
 						end
 					end
+
+					uwire  we = (p.op ==? WR) && cs;
+					if((CF == 1) && (stage == 0)) begin
+						always @(posedge clk) begin
+							if(we)  Threshs[0] <= p.val;
+						end
+					end
+					else begin
+						uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
+						always @(posedge clk) begin
+							if(we)  Threshs[addr] <= p.val;
+						end
+					end
+
+					assign	threshs = Threshs;
+				end : genThreshMem
+				else begin : genThreshCst
+					for(genvar  c = 0; c < CF; c++) begin
+						for(genvar  i = 0; i < 2**stage; i++) begin
+							assign	threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
+						end
+					end
+				end : genThreshCst
+
+				if((CF == 1) && (stage == 0)) begin
+					assign	Thresh = threshs[0];
+				end
+				else begin
 					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
-					always @(posedge clk) begin
-						if(we)  Threshs[addr] <= p.val;
-						Thresh <= Threshs[addr];
+					always_ff @(posedge clk) begin
+						Thresh <= threshs[addr];
 					end
 				end
-			end : blkThreshMem
+
+			end : blkThresh
 
 			// Pipeline State
 			pipe_t  P = '{ op: NOP, default: 'x };
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 20bdff6d25..67c2213dfb 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -135,7 +135,7 @@ module thresholding_axi #(
 	thresholding #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
-		.THRESHOLDS(THRESHOLDS)
+		.THRESHOLDS(THRESHOLDS), .USE_CONFIG(USE_AXILITE)
 	) impl (
 		.clk(ap_clk), .rst(!ap_rst_n),
 
diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
index 200d4d5999..926c318adc 100644
--- a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
+++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
@@ -57,6 +57,7 @@ module thresholding_axi_tb #(
 	localparam int unsigned C_BITS = C < 2? 1 : $clog2(C);
 
 	localparam int unsigned MST_STRM_WROUNDS = 503;
+	localparam bit  DYNAMIC_CONFIG = 0;
 
 	typedef int unsigned  threshs_t[C][2**N-1];
 	function threshs_t init_thresholds();
@@ -110,7 +111,8 @@ module thresholding_axi_tb #(
 	uwire  ovld;
 	uwire [PE-1:0][N-1:0]  odat;
 
-	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0)) dut (
+	localparam threshs_t  THRESHS_STATIC = DYNAMIC_CONFIG? '{ default: '{ default: 'x } } : THRESHS;
+	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .THRESHOLDS(THRESHS_STATIC), .USE_AXILITE(1)) dut (
 		.ap_clk(clk), .ap_rst_n(!rst),
 
 		// Configuration
@@ -158,42 +160,44 @@ module thresholding_axi_tb #(
 
 		@(posedge clk iff !rst);
 
-		// Threshold Configuratin
-		for(int unsigned  c = 0; c < C; c+=PE) begin
-			automatic addr_t  addr = 0;
-			if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
-			for(int unsigned  pe = 0; pe < PE; pe++) begin
-				if(PE > 1)  addr[N+:$clog2(PE)] = pe;
-				for(int unsigned  t = 0; t < 2**N-1; t++) begin
-					addr[0+:N] = t;
-					fork
-						begin
-							s_axilite_AWVALID <= 1;
-							s_axilite_AWADDR  <= { addr, 2'b00 };
-							@(posedge clk iff s_axilite_AWREADY);
-							s_axilite_AWVALID <= 0;
-							s_axilite_AWADDR  <= 'x;
-						end
-						begin
-							s_axilite_WVALID <= 1;
-							s_axilite_WDATA  <= THRESHS[c+pe][t];
-							@(posedge clk iff s_axilite_WREADY);
-							s_axilite_WVALID <= 0;
-							s_axilite_WDATA  <= 'x;
-						end
-						begin
-							s_axilite_BREADY <= 1;
-							@(posedge clk iff s_axilite_BVALID);
-							assert(s_axilite_BRESP == '0) else begin
-								$error("Error on parameter write.");
-								$stop;
+		// Threshold Configuration
+		if(DYNAMIC_CONFIG) begin : blkConfig
+			for(int unsigned  c = 0; c < C; c+=PE) begin
+				automatic addr_t  addr = 0;
+				if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
+				for(int unsigned  pe = 0; pe < PE; pe++) begin
+					if(PE > 1)  addr[N+:$clog2(PE)] = pe;
+					for(int unsigned  t = 0; t < 2**N-1; t++) begin
+						addr[0+:N] = t;
+						fork
+							begin
+								s_axilite_AWVALID <= 1;
+								s_axilite_AWADDR  <= { addr, 2'b00 };
+								@(posedge clk iff s_axilite_AWREADY);
+								s_axilite_AWVALID <= 0;
+								s_axilite_AWADDR  <= 'x;
 							end
-							s_axilite_BREADY <= 0;
-						end
-					join
+							begin
+								s_axilite_WVALID <= 1;
+								s_axilite_WDATA  <= THRESHS[c+pe][t];
+								@(posedge clk iff s_axilite_WREADY);
+								s_axilite_WVALID <= 0;
+								s_axilite_WDATA  <= 'x;
+							end
+							begin
+								s_axilite_BREADY <= 1;
+								@(posedge clk iff s_axilite_BVALID);
+								assert(s_axilite_BRESP == '0) else begin
+									$error("Error on parameter write.");
+									$stop;
+								end
+								s_axilite_BREADY <= 0;
+							end
+						join
+					end
 				end
 			end
-		end
+		end : blkConfig
 
 		fork
 			// Intermittent configuration readback
diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv
index 90dfba1022..20f3879422 100644
--- a/finn-rtllib/thresholding/sim/thresholding_tb.sv
+++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv
@@ -85,7 +85,7 @@ module thresholding_tb #(
 		uwire  ovld;
 		uwire [PE-1:0][N-1:0]  odat;
 
-		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG)) dut (
+		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1)) dut (
 			.clk, .rst,
 
 			// Configuration

From 8fb250c9f6e340227608b0b455d6d3ce66376c4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Thu, 23 Nov 2023 18:45:51 +0000
Subject: [PATCH 106/111] Switching to threshold initialization from hex data
 files.

---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 33 +++------
 .../thresholding/hdl/thresholding_axi.sv      | 18 +++--
 .../hdl/thresholding_axi_tpl_inner.sv         | 20 +++---
 .../hdl/thresholding_axi_tpl_outer.v          |  2 +
 finn-rtllib/thresholding/sim/thresh_gen.sv    | 45 ++++++++++++
 .../thresholding/sim/thresholding_axi_tb.sv   | 70 +++++++++----------
 6 files changed, 113 insertions(+), 75 deletions(-)
 create mode 100644 finn-rtllib/thresholding/sim/thresh_gen.sv

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index c56e2a994e..ff3d4172ab 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -61,8 +61,8 @@ module thresholding #(
 	bit  FPARG  = 0,  // floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,  // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	// Initial Thresholds (per channel)
-	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
+	// Initial Thresholds
+	parameter  THRESHOLDS_PATH = "",
 	bit  USE_CONFIG = 1,
 
 	localparam int unsigned  CF = C/PE,  // Channel fold
@@ -194,17 +194,13 @@ module thresholding #(
 			val_t  Thresh;	// Read-out register
 			if(1) begin : blkThresh
 
-				uwire val_t  threshs[CF * 2**stage];
-				if(USE_CONFIG) begin : genThreshMem
-					val_t  Threshs[CF * 2**stage];
-					initial begin
-						for(int unsigned  c = 0; c < CF; c++) begin
-							for(int unsigned  i = 0; i < 2**stage; i++) begin
-								Threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
-							end
-						end
-					end
+				val_t  Threshs[CF * 2**stage];
+				if(THRESHOLDS_PATH != "") begin
+					localparam  FILE = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage);
+					initial  $readmemh(FILE, Threshs);
+				end
 
+				if(USE_CONFIG) begin : genThreshMem
 					uwire  we = (p.op ==? WR) && cs;
 					if((CF == 1) && (stage == 0)) begin
 						always @(posedge clk) begin
@@ -217,24 +213,15 @@ module thresholding #(
 							if(we)  Threshs[addr] <= p.val;
 						end
 					end
-
-					assign	threshs = Threshs;
 				end : genThreshMem
-				else begin : genThreshCst
-					for(genvar  c = 0; c < CF; c++) begin
-						for(genvar  i = 0; i < 2**stage; i++) begin
-							assign	threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
-						end
-					end
-				end : genThreshCst
 
 				if((CF == 1) && (stage == 0)) begin
-					assign	Thresh = threshs[0];
+					assign	Thresh = Threshs[0];
 				end
 				else begin
 					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
 					always_ff @(posedge clk) begin
-						Thresh <= threshs[addr];
+						Thresh <= Threshs[addr];
 					end
 				end
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 67c2213dfb..69617a20d9 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -48,8 +48,8 @@ module thresholding_axi #(
 	bit  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
 	int  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	// Initial Thresholds (per channel)
-	logic [K-1:0]  THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } },
+	// Initial Thresholds
+	parameter  THRESHOLDS_PATH = "",
 
 	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
 
@@ -103,12 +103,13 @@ module thresholding_axi #(
 	// AXI-lite Configuration Interface
 	uwire  cfg_en;
 	uwire  cfg_we;
-	uwire [ADDR_BITS-1:0]  cfg_a;
+	uwire [ADDR_BITS-3:0]  cfg_a;
 	uwire [K        -1:0]  cfg_d;
 	uwire  cfg_rack;
 	uwire [K        -1:0]  cfg_q;
 
 	if(USE_AXILITE) begin
+		uwire [ADDR_BITS-1:0]  cfg_a0;
 		axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
 			.aclk(ap_clk), .aresetn(ap_rst_n),
 
@@ -119,9 +120,16 @@ module thresholding_axi #(
 			.arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x),
 			.rready(s_axilite_RREADY),   .rvalid(s_axilite_RVALID),   .rresp(s_axilite_RRESP),   .rdata(s_axilite_RDATA),
 
-			.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d),
+			.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a0), .ip_wdata(cfg_d),
 			.ip_rack(cfg_rack), .ip_rdata(cfg_q)
 		);
+		assign	cfg_a = cfg_a0[ADDR_BITS-3:0];
+		always_ff @(posedge ap_clk) begin
+			assert(!ap_rst_n || !cfg_en || (cfg_a0[ADDR_BITS-2+:2] === 3'h0)) else begin
+				$error("%m: Spurious high address bits.");
+				$stop;
+			end
+		end
 	end
 	else begin
 		assign	cfg_en =  0;
@@ -135,7 +143,7 @@ module thresholding_axi #(
 	thresholding #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
-		.THRESHOLDS(THRESHOLDS), .USE_CONFIG(USE_AXILITE)
+		.THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE)
 	) impl (
 		.clk(ap_clk), .rst(!ap_rst_n),
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
index ddda5a88ed..b1350a9f31 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -32,17 +32,17 @@
  */
 
 module thresholding_axi_tpl_inner #(
-	int unsigned  N,	// output precision
-	int unsigned  K,	// input/threshold precision
-	int unsigned  C,	// Channels
-	int unsigned  PE,	// Processing Parallelism, requires C = k*PE
+	int unsigned  N = 4,	// output precision
+	int unsigned  K = 9,	// input/threshold precision
+	int unsigned  C = 6,	// Channels
+	int unsigned  PE = 2,	// Processing Parallelism, requires C = k*PE
 
-	int unsigned  SIGNED,	// signed inputs
-	int unsigned  FPARG,	// floating-point inputs: [sign] | exponent | mantissa
-	int  BIAS,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+	int unsigned  SIGNED = 1,	// signed inputs
+	int unsigned  FPARG = 0,	// floating-point inputs: [sign] | exponent | mantissa
+	int  BIAS = 0,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
-	logic [K-1:0]  THRESHOLDS[C][2**N-1] = $THRESHOLDS$,
-	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
+	parameter  THRESHOLDS_PATH = "../../../data",
+	bit  USE_AXILITE = 1,	// Implement AXI-Lite for threshold read/write
 
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
@@ -95,7 +95,7 @@ module thresholding_axi_tpl_inner #(
 		.SIGNED(SIGNED),
 		.FPARG(FPARG),
 		.BIAS(BIAS),
-		.THRESHOLDS(THRESHOLDS),
+		.THRESHOLDS_PATH(THRESHOLDS_PATH),
 		.USE_AXILITE(USE_AXILITE)
 	) core (
 		.ap_clk, .ap_rst_n,
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
index 3521987b66..13c8189f0e 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
@@ -41,6 +41,7 @@ module thresholding_axi_tpl_outer #(
 	parameter  FPARG  = 0,			// floating-point inputs: [sign] | exponent | mantissa
 	parameter  BIAS   = $BIAS$,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
 
+	parameter  THRESHOLDS_PATH = $THRESHOLDS_PATH$,	// Directory with initial threshold data
 	parameter  USE_AXILITE = $USE_AXILITE$,	// Implement AXI-Lite for threshold read/write
 
 	parameter  O_BITS = $O_BITS$
@@ -93,6 +94,7 @@ module thresholding_axi_tpl_outer #(
 		.SIGNED(SIGNED),
 		.FPARG(FPARG),
 		.BIAS(BIAS),
+		.THRESHOLDS_PATH(THRESHOLDS_PATH),
 		.USE_AXILITE(USE_AXILITE)
 	) core (
 		.ap_clk(ap_clk), .ap_rst_n(ap_rst_n),
diff --git a/finn-rtllib/thresholding/sim/thresh_gen.sv b/finn-rtllib/thresholding/sim/thresh_gen.sv
new file mode 100644
index 0000000000..a8a18be691
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresh_gen.sv
@@ -0,0 +1,45 @@
+module thresh_gen;
+	localparam int unsigned  K = 9;
+	localparam int unsigned  N = 4;
+	localparam int unsigned  C = 6;
+
+	typedef logic [K-1:0]  thresh_t;
+	localparam thresh_t  THRESHOLDS[C][2**N-1] = '{
+		'{ 'h00, 'h01, 'h02, 'h03, 'h04, 'h05, 'h06, 'h07, 'h08, 'h09, 'h0a, 'h0b, 'h0c, 'h0d, 'h0e },
+		'{ 'h10, 'h11, 'h12, 'h13, 'h14, 'h15, 'h16, 'h17, 'h18, 'h19, 'h1a, 'h1b, 'h1c, 'h1d, 'h1e },
+		'{ 'h20, 'h21, 'h22, 'h23, 'h24, 'h25, 'h26, 'h27, 'h28, 'h29, 'h2a, 'h2b, 'h2c, 'h2d, 'h2e },
+		'{ 'h30, 'h31, 'h32, 'h33, 'h34, 'h35, 'h36, 'h37, 'h38, 'h39, 'h3a, 'h3b, 'h3c, 'h3d, 'h3e },
+		'{ 'h40, 'h41, 'h42, 'h43, 'h44, 'h45, 'h46, 'h47, 'h48, 'h49, 'h4a, 'h4b, 'h4c, 'h4d, 'h4e },
+		'{ 'h50, 'h51, 'h52, 'h53, 'h54, 'h55, 'h56, 'h57, 'h58, 'h59, 'h5a, 'h5b, 'h5c, 'h5d, 'h5e }
+	};
+	localparam  THRESHOLDS_PATH = ".";
+
+	localparam int unsigned  PE = 2;
+	localparam int unsigned  CF = C/PE;
+
+	for(genvar  stage = 0; stage < N; stage++) begin
+		localparam int unsigned  SN = N-1-stage;
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			initial begin
+				automatic string  file = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage);
+
+				automatic thresh_t  threshs[CF * 2**stage];
+				for(int unsigned  c = 0; c < CF; c++) begin
+					for(int unsigned  i = 0; i < 2**stage; i++) begin
+						threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
+					end
+				end
+
+				$writememh(file, threshs);
+			end
+		end
+	end
+
+    // Quit after running all initializers
+	initial begin
+		#1ns;
+		$display("Generation done.");
+		$finish;
+	end
+
+endmodule : thresh_gen
diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
index 926c318adc..918f539d15 100644
--- a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
+++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
@@ -57,7 +57,6 @@ module thresholding_axi_tb #(
 	localparam int unsigned C_BITS = C < 2? 1 : $clog2(C);
 
 	localparam int unsigned MST_STRM_WROUNDS = 503;
-	localparam bit  DYNAMIC_CONFIG = 0;
 
 	typedef int unsigned  threshs_t[C][2**N-1];
 	function threshs_t init_thresholds();
@@ -111,8 +110,7 @@ module thresholding_axi_tb #(
 	uwire  ovld;
 	uwire [PE-1:0][N-1:0]  odat;
 
-	localparam threshs_t  THRESHS_STATIC = DYNAMIC_CONFIG? '{ default: '{ default: 'x } } : THRESHS;
-	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .THRESHOLDS(THRESHS_STATIC), .USE_AXILITE(1)) dut (
+	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .USE_AXILITE(1)) dut (
 		.ap_clk(clk), .ap_rst_n(!rst),
 
 		// Configuration
@@ -161,43 +159,41 @@ module thresholding_axi_tb #(
 		@(posedge clk iff !rst);
 
 		// Threshold Configuration
-		if(DYNAMIC_CONFIG) begin : blkConfig
-			for(int unsigned  c = 0; c < C; c+=PE) begin
-				automatic addr_t  addr = 0;
-				if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
-				for(int unsigned  pe = 0; pe < PE; pe++) begin
-					if(PE > 1)  addr[N+:$clog2(PE)] = pe;
-					for(int unsigned  t = 0; t < 2**N-1; t++) begin
-						addr[0+:N] = t;
-						fork
-							begin
-								s_axilite_AWVALID <= 1;
-								s_axilite_AWADDR  <= { addr, 2'b00 };
-								@(posedge clk iff s_axilite_AWREADY);
-								s_axilite_AWVALID <= 0;
-								s_axilite_AWADDR  <= 'x;
-							end
-							begin
-								s_axilite_WVALID <= 1;
-								s_axilite_WDATA  <= THRESHS[c+pe][t];
-								@(posedge clk iff s_axilite_WREADY);
-								s_axilite_WVALID <= 0;
-								s_axilite_WDATA  <= 'x;
-							end
-							begin
-								s_axilite_BREADY <= 1;
-								@(posedge clk iff s_axilite_BVALID);
-								assert(s_axilite_BRESP == '0) else begin
-									$error("Error on parameter write.");
-									$stop;
-								end
-								s_axilite_BREADY <= 0;
+		for(int unsigned  c = 0; c < C; c+=PE) begin
+			automatic addr_t  addr = 0;
+			if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
+			for(int unsigned  pe = 0; pe < PE; pe++) begin
+				if(PE > 1)  addr[N+:$clog2(PE)] = pe;
+				for(int unsigned  t = 0; t < 2**N-1; t++) begin
+					addr[0+:N] = t;
+					fork
+						begin
+							s_axilite_AWVALID <= 1;
+							s_axilite_AWADDR  <= { addr, 2'b00 };
+							@(posedge clk iff s_axilite_AWREADY);
+							s_axilite_AWVALID <= 0;
+							s_axilite_AWADDR  <= 'x;
+						end
+						begin
+							s_axilite_WVALID <= 1;
+							s_axilite_WDATA  <= THRESHS[c+pe][t];
+							@(posedge clk iff s_axilite_WREADY);
+							s_axilite_WVALID <= 0;
+							s_axilite_WDATA  <= 'x;
+						end
+						begin
+							s_axilite_BREADY <= 1;
+							@(posedge clk iff s_axilite_BVALID);
+							assert(s_axilite_BRESP == '0) else begin
+								$error("Error on parameter write.");
+								$stop;
 							end
-						join
-					end
+							s_axilite_BREADY <= 0;
+						end
+					join
 				end
 			end
-		end : blkConfig
+		end
 
 		fork
 			// Intermittent configuration readback

From 9de06be72165d7b2a8bc5d87ca2e2ad0cfca1fd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Tue, 28 Nov 2023 07:37:21 +0000
Subject: [PATCH 107/111] Adding parameters to request local memories of a
 given depth to be mapped to BRAM or URAM.

---
 finn-rtllib/thresholding/hdl/thresholding.sv          | 11 ++++++++++-
 finn-rtllib/thresholding/hdl/thresholding_axi.sv      |  7 ++++++-
 .../thresholding/hdl/thresholding_axi_tpl_inner.sv    |  8 +++++++-
 .../thresholding/hdl/thresholding_axi_tpl_outer.v     |  8 +++++++-
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index ff3d4172ab..ff801ac7b9 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -65,6 +65,10 @@ module thresholding #(
 	parameter  THRESHOLDS_PATH = "",
 	bit  USE_CONFIG = 1,
 
+	// Force Use of On-Chip Memory Blocks
+	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+
 	localparam int unsigned  CF = C/PE,  // Channel fold
 	localparam int unsigned  O_BITS = BIAS >= 0?
 		/* unsigned */ $clog2(2**N+BIAS) :
@@ -193,8 +197,13 @@ module thresholding #(
 			// Threshold Memory
 			val_t  Thresh;	// Read-out register
 			if(1) begin : blkThresh
+				localparam int unsigned  DEPTH = CF * 2**stage;
+				localparam  RAM_STYLE =
+					DEPTH_TRIGGER_URAM && (DEPTH >= DEPTH_TRIGGER_URAM)? "ultra" :
+					DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" : "auto";
 
-				val_t  Threshs[CF * 2**stage];
+				(* RAM_STYLE = RAM_STYLE *)
+				val_t  Threshs[DEPTH];
 				if(THRESHOLDS_PATH != "") begin
 					localparam  FILE = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage);
 					initial  $readmemh(FILE, Threshs);
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 69617a20d9..1254d71750 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -53,6 +53,10 @@ module thresholding_axi #(
 
 	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
 
+	// Force Use of On-Chip Memory Blocks
+	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -143,7 +147,8 @@ module thresholding_axi #(
 	thresholding #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
-		.THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE)
+		.THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE),
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
 	) impl (
 		.clk(ap_clk), .rst(!ap_rst_n),
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
index b1350a9f31..d1c5333ebf 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
@@ -44,6 +44,10 @@ module thresholding_axi_tpl_inner #(
 	parameter  THRESHOLDS_PATH = "../../../data",
 	bit  USE_AXILITE = 1,	// Implement AXI-Lite for threshold read/write
 
+	// Force Use of On-Chip Memory Blocks
+	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -96,7 +100,9 @@ module thresholding_axi_tpl_inner #(
 		.FPARG(FPARG),
 		.BIAS(BIAS),
 		.THRESHOLDS_PATH(THRESHOLDS_PATH),
-		.USE_AXILITE(USE_AXILITE)
+		.USE_AXILITE(USE_AXILITE),
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM),
+		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
 	) core (
 		.ap_clk, .ap_rst_n,
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
index 13c8189f0e..ff2d0c3c74 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
@@ -44,6 +44,10 @@ module thresholding_axi_tpl_outer #(
 	parameter  THRESHOLDS_PATH = $THRESHOLDS_PATH$,	// Directory with initial threshold data
 	parameter  USE_AXILITE = $USE_AXILITE$,	// Implement AXI-Lite for threshold read/write
 
+	// Force Use of On-Chip Memory Blocks
+	parameter  DEPTH_TRIGGER_URAM = $DEPTH_TRIGGER_URAM$,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	parameter  DEPTH_TRIGGER_BRAM = $DEPTH_TRIGGER_BRAM$,	// if non-zero, local mems of this depth or more go into BRAM
+
 	parameter  O_BITS = $O_BITS$
 )(
 	// Global Control
@@ -95,7 +99,9 @@ module thresholding_axi_tpl_outer #(
 		.FPARG(FPARG),
 		.BIAS(BIAS),
 		.THRESHOLDS_PATH(THRESHOLDS_PATH),
-		.USE_AXILITE(USE_AXILITE)
+		.USE_AXILITE(USE_AXILITE),
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM),
+		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
 	) core (
 		.ap_clk(ap_clk), .ap_rst_n(ap_rst_n),
 

From 038a58a80dddc47365a69d429123425f59f6f810 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Wed, 29 Nov 2023 09:40:05 +0000
Subject: [PATCH 108/111] Prevent BRAM use below specified trigger.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index ff801ac7b9..8f862c7bf2 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -200,7 +200,9 @@ module thresholding #(
 				localparam int unsigned  DEPTH = CF * 2**stage;
 				localparam  RAM_STYLE =
 					DEPTH_TRIGGER_URAM && (DEPTH >= DEPTH_TRIGGER_URAM)? "ultra" :
-					DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" : "auto";
+					DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" :
+					// If BRAM trigger defined, force distributed memory below if Vivado may be tempted to use BRAM nonetheless.
+					DEPTH_TRIGGER_BRAM && (DEPTH >= 64)? "distributed" : "auto";
 
 				(* RAM_STYLE = RAM_STYLE *)
 				val_t  Threshs[DEPTH];

From f39187c2c8ee6952755fec3753d5107bfd1e48e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Fri, 1 Dec 2023 09:00:20 +0000
Subject: [PATCH 109/111] Fixing N vs. 2^N confusion in the module description.

---
 finn-rtllib/thresholding/hdl/thresholding.sv | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 8f862c7bf2..4e1de356c2 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -44,11 +44,11 @@
  *  accompanied by a channel selector.
  *
  *  Parameter Layout as seen on AXI-Lite (row by row):
- *            | Base               \   Offs  |   0    1    2  ...   N-2     N-1
- *   ---------+------------------------------+----------------------------------
- *    Chnl #0 |   0                          |  T_0  T_1  T_2 ... T_{N-2}    'x
- *    Chnl #1 |   N                          |  T_0  T_1  T_2 ... T_{N-2}    'x
- *    Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*N |  T_0  T_1  T_2 ... T_{N-2}    'x
+ *            | Base                \    Offs  |   0    1    2  ...   2^N-2   2^N-1
+ *   ---------+--------------------------------+------------------------------------
+ *    Chnl #0 |   0                            |  T_0  T_1  T_2 ... T_{2^N-2}  'x
+ *    Chnl #1 |   2^N                          |  T_0  T_1  T_2 ... T_{2^N-2}  'x
+ *    Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*2^N |  T_0  T_1  T_2 ... T_{2^N-2}  'x
  *
  *****************************************************************************/
 module thresholding #(

From 7284d2c6bf0210dd76294acc34e1d7ff8378db87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Wed, 6 Dec 2023 11:02:25 +0000
Subject: [PATCH 110/111] Add deep pipelining option to thresholding
 implementation.

---
 finn-rtllib/thresholding/hdl/thresholding.sv  | 28 +++++++++++++++----
 .../thresholding/sim/thresholding_tb.sv       |  8 ++++--
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
index 4e1de356c2..75fbb61a4d 100644
--- a/finn-rtllib/thresholding/hdl/thresholding.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -68,6 +68,7 @@ module thresholding #(
 	// Force Use of On-Chip Memory Blocks
 	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
 	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+	bit  DEEP_PIPELINE = 0,
 
 	localparam int unsigned  CF = C/PE,  // Channel fold
 	localparam int unsigned  O_BITS = BIAS >= 0?
@@ -128,14 +129,15 @@ module thresholding #(
 	//	- configuration always takes precedence
 	//	- number of pending thresholding ops capped to N+3
 	//	  across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1
+	localparam int unsigned  MAX_PENDING = (DEEP_PIPELINE+1)*N + 3;
 	pipe_t  pipe[PE][N+1];
 	if(1) begin : blkFeed
 
 		// Thresholding Input Guard ensuring Output FIFO is never overrun
-		logic signed [$clog2(N+3):0]  GuardSem = N+2;	// N+2, N+1, ..., 0, -1
+		logic signed [$clog2(MAX_PENDING):0]  GuardSem = MAX_PENDING-1;	// MAX_PENDING-1, ..., 0, -1
 		uwire  th_full = GuardSem[$left(GuardSem)];
 		always_ff @(posedge clk) begin
-			if(rst)  GuardSem <= N+2;
+			if(rst)  GuardSem <= MAX_PENDING-1;
 			else begin
 				automatic logic  dec = !(USE_CONFIG && cfg_en) && !th_full && ivld;
 				automatic logic  inc = ovld && ordy;
@@ -268,13 +270,29 @@ module thresholding #(
 					endcase
 				end
 			end : blkSignedFloat
+
+			// Pipeline State Update
+			pipe_t  pp;
 			always_comb begin
-				automatic pipe_t  pp = P;
+				pp = P;
 				if(P.op !=? CFG)  pp.ptr[SN] = cmp;
 				if(Reval)         pp.val = Thresh;
-				pipe[pe][stage+1] = pp;
 			end
 
+			// Pipeline State Forward (potentially additional register)
+			pipe_t  pf;
+			if(!DEEP_PIPELINE)  assign  pf = pp;
+			else begin
+				pipe_t  Pf = '{ op: NOP, default: 'x };
+				always_ff @(posedge clk) begin
+					if(rst)  Pf <= '{ op: NOP, default: 'x };
+					else     Pf <= pp;
+				end
+				assign	pf = Pf;
+			end
+
+			assign	pipe[pe][stage+1] = pf;
+
 		end : genPE
 	end : genStages
 
@@ -295,7 +313,7 @@ module thresholding #(
 	//	- Depth of N + Output Reg to allow pipe to drain entirely under backpressure
 	//	- Typically mapped to an SRL shift register
 	if(1) begin : blkStreamOutput
-		localparam int unsigned  A_DEPTH = N+2;
+		localparam int unsigned  A_DEPTH = MAX_PENDING - 1;
 		logic        [PE-1 : 0][N-1 : 0]  ADat[A_DEPTH];
 		logic signed [$clog2(A_DEPTH):0]  APtr = '1;	// -1, 0, 1, ..., A_DEPTH-1
 		uwire  avld = !APtr[$left(APtr)];
diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv
index 20f3879422..e42145f10e 100644
--- a/finn-rtllib/thresholding/sim/thresholding_tb.sv
+++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv
@@ -41,6 +41,8 @@ module thresholding_tb #(
 
 	localparam int unsigned  CF = C/PE	// Channel Fold
 );
+    localparam bit  DEEP_PIPELINE = 1;
+
 	localparam int unsigned  MST_STRM_WROUNDS = 507;
 	localparam bit  THROTTLED = 1;
 
@@ -85,7 +87,7 @@ module thresholding_tb #(
 		uwire  ovld;
 		uwire [PE-1:0][N-1:0]  odat;
 
-		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1)) dut (
+		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1), .DEEP_PIPELINE(DEEP_PIPELINE)) dut (
 			.clk, .rst,
 
 			// Configuration
@@ -165,7 +167,7 @@ module thresholding_tb #(
 					cfg_we <= 'x;
 					cfg_a  <= 'x;
 					@(posedge clk);
-					if(($urandom()%37) == 0) begin
+					if(($urandom()%41) == 0) begin
 						automatic addr_t  addr = $urandom()%(N-1);
 						if(PE > 1)  addr[N+:$clog2(PE)] = $urandom()%PE;
 						if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF;
@@ -194,7 +196,7 @@ module thresholding_tb #(
 				end
 			join_any
 			done <= 1;
-			repeat(N+6)  @(posedge clk);
+			repeat((DEEP_PIPELINE+1)*N+6)  @(posedge clk);
 
 			assert(QW.size() == 0) else begin
 				$error("[%0d] Missing %0d outputs.", i, QW.size());

From aa57255b7b0f81b731d5a8c77230dff15c5fb065 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= <thomas.preusser@xilinx.com>
Date: Wed, 6 Dec 2023 17:09:36 +0000
Subject: [PATCH 111/111] Removing the inner wrapper. Exposing the
 DEEP_PIPELINING option and adjusting to established naming in FINN.

---
 .../thresholding/hdl/thresholding_axi.sv      |   4 +-
 .../hdl/thresholding_axi_tpl_inner.sv         | 119 ------------------
 ...uter.v => thresholding_template_wrapper.v} |  28 +++--
 3 files changed, 18 insertions(+), 133 deletions(-)
 delete mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
 rename finn-rtllib/thresholding/hdl/{thresholding_axi_tpl_outer.v => thresholding_template_wrapper.v} (85%)

diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
index 1254d71750..1f235b9486 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -56,6 +56,7 @@ module thresholding_axi #(
 	// Force Use of On-Chip Memory Blocks
 	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
 	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+	bit  DEEP_PIPELINE = 0,
 
 	localparam int unsigned  CF = C/PE,	// Channel Fold
 	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
@@ -148,7 +149,8 @@ module thresholding_axi #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
 		.THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE),
-		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM),
+		.DEEP_PIPELINE(DEEP_PIPELINE)
 	) impl (
 		.clk(ap_clk), .rst(!ap_rst_n),
 
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
deleted file mode 100644
index d1c5333ebf..0000000000
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Copyright (c) 2023, Xilinx
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice, this
- *   list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * * Neither the name of FINN nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * @author	Thomas B. Preußer <thomas.preusser@amd.com>
- * @brief	Verilog wrapper for IP packaging.
- */
-
-module thresholding_axi_tpl_inner #(
-	int unsigned  N = 4,	// output precision
-	int unsigned  K = 9,	// input/threshold precision
-	int unsigned  C = 6,	// Channels
-	int unsigned  PE = 2,	// Processing Parallelism, requires C = k*PE
-
-	int unsigned  SIGNED = 1,	// signed inputs
-	int unsigned  FPARG = 0,	// floating-point inputs: [sign] | exponent | mantissa
-	int  BIAS = 0,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
-
-	parameter  THRESHOLDS_PATH = "../../../data",
-	bit  USE_AXILITE = 1,	// Implement AXI-Lite for threshold read/write
-
-	// Force Use of On-Chip Memory Blocks
-	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
-	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
-
-	localparam int unsigned  CF = C/PE,	// Channel Fold
-	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
-	localparam int unsigned  O_BITS = BIAS >= 0?
-		/* unsigned */ $clog2(2**N+BIAS) :
-		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
-)(
-	// Global Control
-	input	ap_clk,
-	input	ap_rst_n,
-
-	//- AXI Lite ------------------------
-	// Writing
-	input                  s_axilite_AWVALID,
-	output                 s_axilite_AWREADY,
-	input [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
-
-	input         s_axilite_WVALID,
-	output        s_axilite_WREADY,
-	input [31:0]  s_axilite_WDATA,
-	input [ 3:0]  s_axilite_WSTRB,
-
-	output        s_axilite_BVALID,
-	input         s_axilite_BREADY,
-	output [1:0]  s_axilite_BRESP,
-
-	// Reading
-	input                  s_axilite_ARVALID,
-	output                 s_axilite_ARREADY,
-	input [ADDR_BITS-1:0]  s_axilite_ARADDR,
-
-	output         s_axilite_RVALID,
-	input          s_axilite_RREADY,
-	output [31:0]  s_axilite_RDATA,
-	output [ 1:0]  s_axilite_RRESP,
-
-	//- AXI Stream - Input --------------
-	output  s_axis_tready,
-	input   s_axis_tvalid,
-	input [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
-
-	//- AXI Stream - Output -------------
-	input   m_axis_tready,
-	output  m_axis_tvalid,
-	output [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
-);
-
-	thresholding_axi #(
-		.N(N), .K(K), .C(C), .PE(PE),
-		.SIGNED(SIGNED),
-		.FPARG(FPARG),
-		.BIAS(BIAS),
-		.THRESHOLDS_PATH(THRESHOLDS_PATH),
-		.USE_AXILITE(USE_AXILITE),
-		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM),
-		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
-	) core (
-		.ap_clk, .ap_rst_n,
-
-		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
-		.s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB,
-		.s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP,
-
-		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
-		.s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP,
-		.s_axis_tready, .s_axis_tvalid, .s_axis_tdata,
-		.m_axis_tready, .m_axis_tvalid, .m_axis_tdata
-	);
-
-endmodule : thresholding_axi_tpl_inner
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v
similarity index 85%
rename from finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
rename to finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v
index ff2d0c3c74..3f0b012ef1 100644
--- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v
+++ b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v
@@ -31,7 +31,7 @@
  * @brief	Verilog wrapper for IP packaging.
  */
 
-module thresholding_axi_tpl_outer #(
+module thresholding_template_wrapper #(
 	parameter  N = $N$,	// output precision
 	parameter  K = $M$,	// input/threshold precision
 	parameter  C = $C$,	// Channels
@@ -47,11 +47,12 @@ module thresholding_axi_tpl_outer #(
 	// Force Use of On-Chip Memory Blocks
 	parameter  DEPTH_TRIGGER_URAM = $DEPTH_TRIGGER_URAM$,	// if non-zero, local mems of this depth or more go into URAM (prio)
 	parameter  DEPTH_TRIGGER_BRAM = $DEPTH_TRIGGER_BRAM$,	// if non-zero, local mems of this depth or more go into BRAM
+	parameter  DEEP_PIPELINE = $DEEP_PIPELINE$,	// [bit] extra pipeline stages for easier timing closure
 
 	parameter  O_BITS = $O_BITS$
 )(
 	// Global Control
-	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *)
+	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *)
 	(* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *)
 	input	ap_clk,
 	(* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *)
@@ -83,17 +84,17 @@ module thresholding_axi_tpl_outer #(
 	output [ 1:0]  s_axilite_RRESP,
 
 	//- AXI Stream - Input --------------
-	output  s_axis_tready,
-	input   s_axis_tvalid,
-	input [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
+	output  in0_V_tready,
+	input   in0_V_tvalid,
+	input [((PE*K+7)/8)*8-1:0]  in0_V_tdata,
 
 	//- AXI Stream - Output -------------
-	input   m_axis_tready,
-	output  m_axis_tvalid,
-	output [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
+	input   out_V_tready,
+	output  out_V_tvalid,
+	output [((PE*O_BITS+7)/8)*8-1:0]  out_V_tdata
 );
 
-	thresholding_axi_tpl_inner #(
+	thresholding_axi #(
 		.N(N), .K(K), .C(C), .PE(PE),
 		.SIGNED(SIGNED),
 		.FPARG(FPARG),
@@ -101,7 +102,8 @@ module thresholding_axi_tpl_outer #(
 		.THRESHOLDS_PATH(THRESHOLDS_PATH),
 		.USE_AXILITE(USE_AXILITE),
 		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM),
-		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM)
+		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM),
+		.DEEP_PIPELINE(DEEP_PIPELINE)
 	) core (
 		.ap_clk(ap_clk), .ap_rst_n(ap_rst_n),
 
@@ -111,8 +113,8 @@ module thresholding_axi_tpl_outer #(
 
 		.s_axilite_ARVALID(s_axilite_ARVALID), .s_axilite_ARREADY(s_axilite_ARREADY), .s_axilite_ARADDR(s_axilite_ARADDR),
 		.s_axilite_RVALID(s_axilite_RVALID), .s_axilite_RREADY(s_axilite_RREADY), .s_axilite_RDATA(s_axilite_RDATA), .s_axilite_RRESP(s_axilite_RRESP),
-		.s_axis_tready(s_axis_tready), .s_axis_tvalid(s_axis_tvalid), .s_axis_tdata(s_axis_tdata),
-		.m_axis_tready(m_axis_tready), .m_axis_tvalid(m_axis_tvalid), .m_axis_tdata(m_axis_tdata)
+		.s_axis_tready(in0_V_tready), .s_axis_tvalid(in0_V_tvalid), .s_axis_tdata(in0_V_tdata),
+		.m_axis_tready(out_V_tready), .m_axis_tvalid(out_V_tvalid), .m_axis_tdata(out_V_tdata)
 	);
 
-endmodule // thresholding_axi_tpl_outer
+endmodule // thresholding_template_wrapper