diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
index fdcf44c6d9..3627855cfb 100644
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -203,6 +203,14 @@ finn.custom\_op.fpgadataflow.thresholding\_batch
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.thresholding\_binary\_search
+-----------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.thresholding_binary_search
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 
 finn.custom\_op.fpgadataflow.tlastmarker
 -----------------------------------------------
diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml
new file mode 100644
index 0000000000..e28a3a2c2d
--- /dev/null
+++ b/finn-rtllib/thresholding/component.xml
@@ -0,0 +1,1002 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<spirit:component xmlns:xilinx="http://www.xilinx.com" xmlns:spirit="http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <spirit:vendor>amd.com</spirit:vendor>
+  <spirit:library>finn</spirit:library>
+  <spirit:name>thresholding_axi</spirit:name>
+  <spirit:version>1.0</spirit:version>
+  <spirit:busInterfaces>
+    <spirit:busInterface>
+      <spirit:name>ap_clk</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>CLK</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_clk</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_RESET</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_RESET">ap_rst_n</spirit:value>
+        </spirit:parameter>
+        <spirit:parameter>
+          <spirit:name>ASSOCIATED_BUSIF</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_BUSIF">s_axilite:s_axis:m_axis</spirit:value>
+        </spirit:parameter>
+        <spirit:parameter>
+          <spirit:name>FREQ_TOLERANCE_HZ</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.FREQ_TOLERANCE_HZ">-1</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>m_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:master/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>m_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axis</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="axis_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tdata</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tvalid</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>TREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axis_tready</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm_rtl" spirit:version="1.0"/>
+      <spirit:slave>
+        <spirit:memoryMapRef spirit:memoryMapRef="s_axilite"/>
+      </spirit:slave>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>AWREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_AWREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WSTRB</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WSTRB</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>WREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_WREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>BREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_BREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARADDR</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARADDR</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>ARREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_ARREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RDATA</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RDATA</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RRESP</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RRESP</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RVALID</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RVALID</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RREADY</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>s_axilite_RREADY</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+    </spirit:busInterface>
+    <spirit:busInterface>
+      <spirit:name>ap_rst_n</spirit:name>
+      <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset" spirit:version="1.0"/>
+      <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset_rtl" spirit:version="1.0"/>
+      <spirit:slave/>
+      <spirit:portMaps>
+        <spirit:portMap>
+          <spirit:logicalPort>
+            <spirit:name>RST</spirit:name>
+          </spirit:logicalPort>
+          <spirit:physicalPort>
+            <spirit:name>ap_rst_n</spirit:name>
+          </spirit:physicalPort>
+        </spirit:portMap>
+      </spirit:portMaps>
+      <spirit:parameters>
+        <spirit:parameter>
+          <spirit:name>POLARITY</spirit:name>
+          <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_RST_N.POLARITY" spirit:choiceRef="choice_list_9d8b0d81">ACTIVE_LOW</spirit:value>
+        </spirit:parameter>
+      </spirit:parameters>
+    </spirit:busInterface>
+  </spirit:busInterfaces>
+  <spirit:memoryMaps>
+    <spirit:memoryMap>
+      <spirit:name>s_axilite</spirit:name>
+      <spirit:displayName>s_axilite</spirit:displayName>
+      <spirit:addressBlock>
+        <spirit:name>reg0</spirit:name>
+        <spirit:displayName>reg0</spirit:displayName>
+        <spirit:baseAddress spirit:format="bitString" spirit:bitStringLength="1">0x0</spirit:baseAddress>
+        <spirit:range spirit:format="long" spirit:resolve="dependent" spirit:dependency="pow(2,(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1) - 0 + 1)" spirit:minimum="4096" spirit:rangeType="long">4096</spirit:range>
+        <spirit:width spirit:format="long">32</spirit:width>
+        <spirit:usage>register</spirit:usage>
+      </spirit:addressBlock>
+    </spirit:memoryMap>
+  </spirit:memoryMaps>
+  <spirit:model>
+    <spirit:views>
+      <spirit:view>
+        <spirit:name>xilinx_anylanguagesynthesis</spirit:name>
+        <spirit:displayName>Synthesis</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:synthesis</spirit:envIdentifier>
+        <spirit:language>Verilog</spirit:language>
+        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_anylanguagesynthesis_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fd0bd85b</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_anylanguagebehavioralsimulation</spirit:name>
+        <spirit:displayName>Simulation</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:simulation</spirit:envIdentifier>
+        <spirit:language>Verilog</spirit:language>
+        <spirit:modelName>thresholding_axi_wrapper</spirit:modelName>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fd0bd85b</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_xpgui</spirit:name>
+        <spirit:displayName>UI Layout</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:xgui.ui</spirit:envIdentifier>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_xpgui_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>fc6b9b63</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+      <spirit:view>
+        <spirit:name>xilinx_utilityxitfiles</spirit:name>
+        <spirit:displayName>Utility XIT/TTCL</spirit:displayName>
+        <spirit:envIdentifier>:vivado.xilinx.com:xit.util</spirit:envIdentifier>
+        <spirit:fileSetRef>
+          <spirit:localName>xilinx_utilityxitfiles_view_fileset</spirit:localName>
+        </spirit:fileSetRef>
+        <spirit:parameters>
+          <spirit:parameter>
+            <spirit:name>viewChecksum</spirit:name>
+            <spirit:value>8b0215cd</spirit:value>
+          </spirit:parameter>
+        </spirit:parameters>
+      </spirit:view>
+    </spirit:views>
+    <spirit:ports>
+      <spirit:port>
+        <spirit:name>ap_clk</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>ap_rst_n</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_AWADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1)">5</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_WSTRB</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">3</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_BRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_ARADDR</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.ADDR_BITS&apos;)) - 1)">5</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RVALID</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RREADY</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RDATA</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">31</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axilite_RRESP</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long">1</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>s_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(((((spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)) * spirit:decode(id(&apos;MODELPARAM_VALUE.K&apos;))) + 7) / 8) * 8) - 1)">15</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">0</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tready</spirit:name>
+        <spirit:wire>
+          <spirit:direction>in</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+          <spirit:driver>
+            <spirit:defaultValue spirit:format="long">1</spirit:defaultValue>
+          </spirit:driver>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tvalid</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+      <spirit:port>
+        <spirit:name>m_axis_tdata</spirit:name>
+        <spirit:wire>
+          <spirit:direction>out</spirit:direction>
+          <spirit:vector>
+            <spirit:left spirit:format="long" spirit:resolve="dependent" spirit:dependency="(((((spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)) * spirit:decode(id(&apos;MODELPARAM_VALUE.O_BITS&apos;))) + 7) / 8) * 8) - 1)">7</spirit:left>
+            <spirit:right spirit:format="long">0</spirit:right>
+          </spirit:vector>
+          <spirit:wireTypeDefs>
+            <spirit:wireTypeDef>
+              <spirit:typeName>std_logic_vector</spirit:typeName>
+              <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef>
+              <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef>
+            </spirit:wireTypeDef>
+          </spirit:wireTypeDefs>
+        </spirit:wire>
+      </spirit:port>
+    </spirit:ports>
+    <spirit:modelParameters>
+      <spirit:modelParameter xsi:type="spirit:nameValueTypeType" spirit:dataType="integer">
+        <spirit:name>N</spirit:name>
+        <spirit:displayName>N</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.N">4</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>K</spirit:name>
+        <spirit:displayName>K</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.K">16</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>C</spirit:name>
+        <spirit:displayName>C</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.C">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>PE</spirit:name>
+        <spirit:displayName>Pe</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.PE">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>SIGNED</spirit:name>
+        <spirit:displayName>Signed</spirit:displayName>
+        <spirit:value spirit:format="bool" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.SIGNED">true</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>FPARG</spirit:name>
+        <spirit:displayName>Fparg</spirit:displayName>
+        <spirit:value spirit:format="bool" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.FPARG">false</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>BIAS</spirit:name>
+        <spirit:displayName>Bias</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.BIAS">0</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>CF</spirit:name>
+        <spirit:displayName>Cf</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.CF" spirit:dependency="(spirit:decode(id(&apos;MODELPARAM_VALUE.C&apos;)) / spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;)))">1</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>ADDR_BITS</spirit:name>
+        <spirit:displayName>Addr Bits</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.ADDR_BITS" spirit:dependency="(((spirit:ceil(spirit:log(2,spirit:decode(id(&apos;MODELPARAM_VALUE.CF&apos;)))) + spirit:ceil(spirit:log(2,spirit:decode(id(&apos;MODELPARAM_VALUE.PE&apos;))))) + spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) + 2)">6</spirit:value>
+      </spirit:modelParameter>
+      <spirit:modelParameter spirit:dataType="integer">
+        <spirit:name>O_BITS</spirit:name>
+        <spirit:displayName>O Bits</spirit:displayName>
+        <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.O_BITS" spirit:dependency="spirit:ceil(spirit:log(2,((2 ** spirit:decode(id(&apos;MODELPARAM_VALUE.N&apos;))) + spirit:decode(id(&apos;MODELPARAM_VALUE.BIAS&apos;)))))">4</spirit:value>
+      </spirit:modelParameter>
+    </spirit:modelParameters>
+  </spirit:model>
+  <spirit:choices>
+    <spirit:choice>
+      <spirit:name>choice_list_9d8b0d81</spirit:name>
+      <spirit:enumeration>ACTIVE_HIGH</spirit:enumeration>
+      <spirit:enumeration>ACTIVE_LOW</spirit:enumeration>
+    </spirit:choice>
+  </spirit:choices>
+  <spirit:fileSets>
+    <spirit:fileSet>
+      <spirit:name>xilinx_anylanguagesynthesis_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>hdl/thresholding.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_7b8c102d</spirit:userFileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/axilite_if.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_69d1ba26</spirit:userFileType>
+        <spirit:logicalName>xil_defaultlib</spirit:logicalName>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>hdl/thresholding.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi.sv</spirit:name>
+        <spirit:fileType>systemVerilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/thresholding_axi_wrapper.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+      </spirit:file>
+      <spirit:file>
+        <spirit:name>hdl/axilite_if.v</spirit:name>
+        <spirit:fileType>verilogSource</spirit:fileType>
+        <spirit:userFileType>USED_IN_ipstatic</spirit:userFileType>
+        <spirit:logicalName>xil_defaultlib</spirit:logicalName>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_xpgui_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>xgui/thresholding_axi_v1_0.tcl</spirit:name>
+        <spirit:fileType>tclSource</spirit:fileType>
+        <spirit:userFileType>CHECKSUM_fc6b9b63</spirit:userFileType>
+        <spirit:userFileType>XGUI_VERSION_2</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+    <spirit:fileSet>
+      <spirit:name>xilinx_utilityxitfiles_view_fileset</spirit:name>
+      <spirit:file>
+        <spirit:name>gui/thresholding_axi_v1_0.gtcl</spirit:name>
+        <spirit:userFileType>GTCL</spirit:userFileType>
+      </spirit:file>
+    </spirit:fileSet>
+  </spirit:fileSets>
+  <spirit:description>MultiThreshold</spirit:description>
+  <spirit:parameters>
+    <spirit:parameter>
+      <spirit:name>N</spirit:name>
+      <spirit:displayName>Output Precision</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.N">4</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>K</spirit:name>
+      <spirit:displayName>Input Precision</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.K">16</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>C</spirit:name>
+      <spirit:displayName>Channels</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.C">1</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>PE</spirit:name>
+      <spirit:displayName>Pe</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.PE">1</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>SIGNED</spirit:name>
+      <spirit:displayName>Signed Inputs</spirit:displayName>
+      <spirit:value spirit:format="bool" spirit:resolve="user" spirit:id="PARAM_VALUE.SIGNED">true</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>FPARG</spirit:name>
+      <spirit:displayName>Floating-Point Inputs</spirit:displayName>
+      <spirit:value spirit:format="bool" spirit:resolve="user" spirit:id="PARAM_VALUE.FPARG">false</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>BIAS</spirit:name>
+      <spirit:displayName>Bias</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.BIAS">0</spirit:value>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>CF</spirit:name>
+      <spirit:displayName>Channel Fold</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.CF">1</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.CF">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>ADDR_BITS</spirit:name>
+      <spirit:displayName>Address Bits</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.ADDR_BITS">6</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.ADDR_BITS">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>O_BITS</spirit:name>
+      <spirit:displayName>Output Value Width</spirit:displayName>
+      <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.O_BITS">4</spirit:value>
+      <spirit:vendorExtensions>
+        <xilinx:parameterInfo>
+          <xilinx:enablement>
+            <xilinx:isEnabled xilinx:id="PARAM_ENABLEMENT.O_BITS">false</xilinx:isEnabled>
+          </xilinx:enablement>
+        </xilinx:parameterInfo>
+      </spirit:vendorExtensions>
+    </spirit:parameter>
+    <spirit:parameter>
+      <spirit:name>Component_Name</spirit:name>
+      <spirit:value spirit:resolve="user" spirit:id="PARAM_VALUE.Component_Name" spirit:order="1">thresholding_axi_wrapper_v1_0</spirit:value>
+    </spirit:parameter>
+  </spirit:parameters>
+  <spirit:vendorExtensions>
+    <xilinx:coreExtensions>
+      <xilinx:supportedFamilies>
+        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">akintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artix7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">azynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">spartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">aspartan7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplus58g</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">artixuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
+      </xilinx:supportedFamilies>
+      <xilinx:taxonomies>
+        <xilinx:taxonomy>/UserIP</xilinx:taxonomy>
+      </xilinx:taxonomies>
+      <xilinx:displayName>thresholding_axi</xilinx:displayName>
+      <xilinx:autoFamilySupportLevel>level_1</xilinx:autoFamilySupportLevel>
+      <xilinx:definitionSource>package_project</xilinx:definitionSource>
+      <xilinx:coreRevision>2</xilinx:coreRevision>
+      <xilinx:upgrades>
+        <xilinx:canUpgradeFrom>user.org:user:thresholding_axi_wrapper:1.0</xilinx:canUpgradeFrom>
+      </xilinx:upgrades>
+      <xilinx:coreCreationDateTime>2023-06-27T05:47:20Z</xilinx:coreCreationDateTime>
+      <xilinx:tags>
+        <xilinx:tag xilinx:name="nopcore"/>
+      </xilinx:tags>
+    </xilinx:coreExtensions>
+    <xilinx:packagingInfo>
+      <xilinx:xilinxVersion>2022.2</xilinx:xilinxVersion>
+      <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="caf1c8b2"/>
+      <xilinx:checksum xilinx:scope="memoryMaps" xilinx:value="5b88f249"/>
+      <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="5b2de4fb"/>
+      <xilinx:checksum xilinx:scope="ports" xilinx:value="8d01c0f6"/>
+      <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="b7d69776"/>
+      <xilinx:checksum xilinx:scope="parameters" xilinx:value="90d363ee"/>
+      <xilinx:targetDRCs>
+        <xilinx:targetDRC xilinx:tool="ipi">
+          <xilinx:targetDRCOption xilinx:name="ignore_freq_hz" xilinx:value="true"/>
+        </xilinx:targetDRC>
+      </xilinx:targetDRCs>
+    </xilinx:packagingInfo>
+  </spirit:vendorExtensions>
+</spirit:component>
diff --git a/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl
new file mode 100644
index 0000000000..90d73ede7e
--- /dev/null
+++ b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl
@@ -0,0 +1,4 @@
+# This file is automatically written.  Do not modify.
+proc gen_USERPARAMETER_CF_VALUE {C PE } {expr $C/$PE}
+proc gen_USERPARAMETER_ADDR_BITS_VALUE {C PE N } {expr int(ceil(log($C/$PE)/log(2))+ceil(log($PE)/log(2))+$N+2)}
+proc gen_USERPARAMETER_O_BITS_VALUE {BIAS N } {expr int(ceil($BIAS >= 0? log(pow(2,$N)+$BIAS)/log(2) : 1+log(-$BIAS >= pow(2,$N-1)? -$BIAS : pow(2,$N)+$BIAS)/log(2)))}
diff --git a/finn-rtllib/thresholding/hdl/axilite_if.v b/finn-rtllib/thresholding/hdl/axilite_if.v
new file mode 100644
index 0000000000..bdd4de288e
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/axilite_if.v
@@ -0,0 +1,210 @@
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+module axi4lite_if
+#(
+    parameter ADDR_WIDTH = 32,
+    parameter DATA_WIDTH = 32,//AXI4 spec requires this to be strictly 32 or 64
+    parameter IP_DATA_WIDTH = 64//can be any power-of-2 multiple of DATA_WIDTH
+)
+(
+//system signals
+input aclk,
+input aresetn,//active low, asynchronous assertion and synchronous deassertion
+
+//Write channels
+//write address
+output reg                  awready,
+input                       awvalid,
+input [ADDR_WIDTH-1:0]      awaddr,
+input [2:0]                 awprot,
+//write data
+output reg                  wready,
+input                       wvalid,
+input [DATA_WIDTH-1:0]      wdata,
+input [(DATA_WIDTH/8)-1:0]  wstrb,
+//burst response
+input                       bready,
+output reg                  bvalid,
+output reg [1:0]            bresp,//NOTE: 00 = OKAY, 10 = SLVERR (write error)
+
+//Read channels
+//read address
+output reg                  arready,
+input                       arvalid,
+input [ADDR_WIDTH-1:0]      araddr,
+input [2:0]                 arprot,
+//read data
+input                       rready,
+output reg                  rvalid,
+output reg [1:0]            rresp,//NOTE: 00 = OKAY, 10 = SLVERR (read error)
+output reg [DATA_WIDTH-1:0] rdata,
+
+//IP-side interface
+output reg                  ip_en,
+output reg                  ip_wen,
+output reg [ADDR_WIDTH-1:0] ip_addr,
+output [IP_DATA_WIDTH-1:0]  ip_wdata,
+input                       ip_rack,
+input [IP_DATA_WIDTH-1:0]      ip_rdata
+);
+
+localparam RESP_OKAY = 2'b00;
+localparam RESP_SLVERR = 2'b10;
+//get ceil(log2(ceil(IP_DATA_WIDTH/DATA_WIDTH)))
+localparam NFOLDS_LOG = $clog2((IP_DATA_WIDTH + DATA_WIDTH - 1) / DATA_WIDTH);
+
+reg                      internal_ren;
+reg                      internal_wen;
+reg                      internal_wack;
+reg [ADDR_WIDTH-1:0]     internal_raddr;
+reg [ADDR_WIDTH-1:0]     internal_waddr;
+reg [DATA_WIDTH-1:0]     internal_wdata;
+wire [DATA_WIDTH-1:0]    internal_rdata;
+reg                      internal_error = 0;
+
+//check DATA_WIDTH
+initial begin
+    if(DATA_WIDTH != 32 & DATA_WIDTH != 64) begin
+        $display("AXI4Lite DATA_WIDTH must be 32 or 64");
+        $finish;
+    end
+end
+
+//transaction state machine
+localparam  STATE_IDLE  = 0,
+            STATE_READ  = 1,
+            STATE_WRITE = 2;
+
+reg [1:0] state;
+
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn)
+        state <= STATE_IDLE;
+    else case(state)
+        STATE_IDLE:
+            if(awvalid & wvalid)
+                state <= STATE_WRITE;
+            else if(arvalid)
+                state <= STATE_READ;
+        STATE_READ:
+            if(rvalid & rready)
+                state <= STATE_IDLE;
+        STATE_WRITE:
+            if(bvalid & bready)
+                state <= STATE_IDLE;
+        default: state <= STATE_IDLE;
+    endcase
+
+//write-related internal signals
+always @(*) begin
+    internal_waddr = awaddr >> $clog2(DATA_WIDTH/8);
+    internal_wdata = wdata;
+    internal_wen = (state == STATE_IDLE) & awvalid & wvalid;
+end
+
+always @(posedge aclk) begin
+    awready <= internal_wen;
+    wready <= internal_wen;
+end
+
+//read-related internal signals
+always @(*) begin
+    internal_raddr = araddr >> $clog2(DATA_WIDTH/8);
+    internal_ren = (state == STATE_IDLE) & ~internal_wen & arvalid;
+end
+
+always @(posedge aclk)
+    arready <= internal_ren;
+
+wire write_to_last_fold;
+
+always @(posedge aclk) begin
+    ip_wen <= write_to_last_fold;
+    ip_en <= internal_ren | write_to_last_fold;
+    if(internal_ren | write_to_last_fold)
+        ip_addr <= internal_ren ? (internal_raddr >> NFOLDS_LOG) : (internal_waddr >> NFOLDS_LOG);
+    internal_wack <= internal_wen;
+end
+
+genvar i;
+reg [(1<<NFOLDS_LOG)*DATA_WIDTH-1:0] ip_wdata_wide;
+generate
+if(NFOLDS_LOG == 0) begin: no_fold
+    assign write_to_last_fold = internal_wen;
+    assign internal_rdata = ip_rdata;
+    always @(posedge aclk)
+        ip_wdata_wide <= internal_wdata;
+end else begin: fold
+    reg [NFOLDS_LOG-1:0] internal_rfold;
+    assign write_to_last_fold = internal_wen & (internal_waddr[NFOLDS_LOG-1:0] == {(NFOLDS_LOG){1'b1}});
+    assign internal_rdata = ip_rdata >> (internal_rfold*DATA_WIDTH);
+    always @(posedge aclk)
+        if(internal_ren)
+            internal_rfold <= internal_raddr[NFOLDS_LOG-1:0];
+    for(i=0; i<(1<<NFOLDS_LOG); i = i+1) begin: gen_wdata
+        always @(posedge aclk)
+            if(internal_waddr[NFOLDS_LOG-1:0] == i)
+                ip_wdata_wide[(i+1)*DATA_WIDTH-1:i*DATA_WIDTH] <= internal_wdata;
+    end
+end
+endgenerate
+assign ip_wdata = ip_wdata_wide[IP_DATA_WIDTH-1:0];
+
+//write response on AXI4L bus
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn) begin
+        bvalid <= 0;//AXI4 spec requires BVALID pulled LOW during reset
+        bresp <= RESP_OKAY;
+    end else if(internal_wack) begin
+        bvalid <= 1;
+        bresp <= internal_error ? RESP_SLVERR : RESP_OKAY;
+    end else if(bready) begin
+        bvalid <= 0;
+        bresp <= RESP_OKAY;
+    end
+
+//read response on AXI4L bus
+always @(posedge aclk or negedge aresetn)
+    if(~aresetn) begin
+        rvalid <= 0;//AXI4 spec requires RVALID pulled LOW during reset
+        rdata <= 0;
+        rresp <= RESP_OKAY;
+    end else if(ip_rack) begin
+        rvalid <= 1;
+        rdata <= internal_rdata;
+        rresp <= internal_error ? RESP_SLVERR : RESP_OKAY;
+    end else if(rready) begin
+        rvalid <= 0;
+        rdata <= 0;
+        rresp <= RESP_OKAY;
+    end
+
+endmodule
diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv
new file mode 100644
index 0000000000..75fbb61a4d
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding.sv
@@ -0,0 +1,358 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Pipelined thresholding by binary search.
+ * @author	Thomas B. Preußer <thomas.preusser@amd.com>
+ *
+ * @description
+ *  Produces the N-bit count of those among 2^N-1 thresholds that are not
+ *  larger than the corresponding input:
+ *     y = Σ(T_i <= x)
+ *  The result is computed by binary search. The runtime-configurable
+ *  thresholds must be written in ascending order:
+ *     i < j => T_i < T_j
+ *  The design supports channel folding allowing each input to be processed
+ *  with respect to a selectable set of thresholds. The corresponding
+ *  threshold configuration relies on a channel address prefix. Inputs are
+ *  accompanied by a channel selector.
+ *
+ *  Parameter Layout as seen on AXI-Lite (row by row):
+ *            | Base                \    Offs  |   0    1    2  ...   2^N-2   2^N-1
+ *   ---------+--------------------------------+------------------------------------
+ *    Chnl #0 |   0                            |  T_0  T_1  T_2 ... T_{2^N-2}  'x
+ *    Chnl #1 |   2^N                          |  T_0  T_1  T_2 ... T_{2^N-2}  'x
+ *    Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*2^N |  T_0  T_1  T_2 ... T_{2^N-2}  'x
+ *
+ *****************************************************************************/
+module thresholding #(
+	int unsigned  N,  // output precision
+	int unsigned  K,  // input/threshold precision
+	int unsigned  C,  // number of channels
+	int unsigned  PE, // parallel processing elements
+
+	bit  SIGNED = 1,  // signed inputs
+	bit  FPARG  = 0,  // floating-point inputs: [sign] | exponent | mantissa
+	int  BIAS   = 0,  // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+
+	// Initial Thresholds
+	parameter  THRESHOLDS_PATH = "",
+	bit  USE_CONFIG = 1,
+
+	// Force Use of On-Chip Memory Blocks
+	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+	bit  DEEP_PIPELINE = 0,
+
+	localparam int unsigned  CF = C/PE,  // Channel fold
+	localparam int unsigned  O_BITS = BIAS >= 0?
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
+)(
+	// Global Control
+	input	logic  clk,
+	input	logic  rst,
+
+	// Threshold Configuration
+	input	logic  cfg_en,
+	input	logic  cfg_we,
+	input	logic [$clog2(CF)+$clog2(PE)+N-1:0]  cfg_a,
+	input	logic [K-1:0]  cfg_d,
+	output	logic  cfg_rack,
+	output	logic [K-1:0]  cfg_q,
+
+	// Input Stream
+	output	logic  irdy,
+	input	logic  ivld,
+	input	logic [PE-1:0][K-1:0]  idat,
+
+	// Output Stream
+	input	logic  ordy,
+	output	logic  ovld,
+	output	logic [PE-1:0][O_BITS-1:0]  odat
+);
+
+	// Parameter Constraints Checking
+	initial begin
+		if(CF*PE != C) begin
+			$error("Parallelism PE=%0d is not a multiple of channel count C=%0d.", PE, C);
+			$finish;
+		end
+	end
+
+	// Operations within Pipeline
+	typedef enum logic [1:0] {
+		NOP = 2'b00, // No operation
+		TH  = 2'b01, // Thresholding
+		WR  = 2'b11, // Write (initialization)
+		RB  = 2'b10, // Readback (validation)
+		CFG = 2'b1x  // Config op (pointer-preserving)
+	} op_e;
+
+	// Pipeline Link Type
+	typedef logic [$clog2(CF)+N-1:0]  ptr_t;
+	typedef logic [K           -1:0]  val_t;
+	typedef struct packed {
+		op_e   op;
+		ptr_t  ptr;	// WR/RB: address;         TH: result
+		val_t  val;	// WR/RB: threshold value; TH: input value
+	} pipe_t;
+
+	//-----------------------------------------------------------------------
+	// Pipeline Feed
+	//	- configuration always takes precedence
+	//	- number of pending thresholding ops capped to N+3
+	//	  across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1
+	localparam int unsigned  MAX_PENDING = (DEEP_PIPELINE+1)*N + 3;
+	pipe_t  pipe[PE][N+1];
+	if(1) begin : blkFeed
+
+		// Thresholding Input Guard ensuring Output FIFO is never overrun
+		logic signed [$clog2(MAX_PENDING):0]  GuardSem = MAX_PENDING-1;	// MAX_PENDING-1, ..., 0, -1
+		uwire  th_full = GuardSem[$left(GuardSem)];
+		always_ff @(posedge clk) begin
+			if(rst)  GuardSem <= MAX_PENDING-1;
+			else begin
+				automatic logic  dec = !(USE_CONFIG && cfg_en) && !th_full && ivld;
+				automatic logic  inc = ovld && ordy;
+				GuardSem <= GuardSem + (inc == dec? 0 : inc? 1 : -1);
+			end
+		end
+
+		// PE Configuration Address Decoding
+		uwire  cfg_sel[PE];
+		if(PE == 1)  assign  cfg_sel[0] = 1;
+		else begin
+			for(genvar  pe = 0; pe < PE; pe++) begin
+				assign	cfg_sel[pe] = USE_CONFIG && cfg_en && (cfg_a[N+:$clog2(PE)] == pe);
+			end
+		end
+
+		uwire ptr_t  iptr;
+		assign	iptr[0+:N] = cfg_a[0+:N];
+		if(CF > 1) begin
+			// Channel Fold Rotation
+			logic [$clog2(CF)-1:0]  CnlCnt = 0;
+			logic                   CnlLst = 0;
+			always_ff @(posedge clk) begin
+				if(rst) begin
+					CnlCnt <= 0;
+					CnlLst <= 0;
+				end
+				else if(!(USE_CONFIG && cfg_en) && !th_full && ivld) begin
+					CnlCnt <= CnlCnt + (CnlLst? 1-CF : 1);
+					CnlLst <= CnlCnt == CF-2;
+				end
+			end
+
+			assign  iptr[N+:$clog2(CF)] = USE_CONFIG && cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt;
+		end
+
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			assign	pipe[pe][0] = '{
+				op:  USE_CONFIG && cfg_en?
+					(!cfg_sel[pe]? NOP : cfg_we? WR : RB) :
+					(ivld && !th_full? TH : NOP),
+				ptr: iptr,
+				val: !(USE_CONFIG && cfg_en)? idat[pe] : cfg_we? cfg_d : 0
+			};
+		end
+
+		assign	irdy = !(USE_CONFIG && cfg_en) && !th_full;
+	end : blkFeed
+
+	//-----------------------------------------------------------------------
+	// Free-Running Thresholding Pipeline
+	for(genvar  stage = 0; stage < N; stage++) begin : genStages
+
+		localparam int unsigned  SN = N-1-stage;
+		for(genvar  pe = 0; pe < PE; pe++) begin : genPE
+			uwire pipe_t  p = pipe[pe][stage];
+			uwire  cs = (p.ptr[SN:0] == 2**SN-1);
+
+			// Threshold Memory
+			val_t  Thresh;	// Read-out register
+			if(1) begin : blkThresh
+				localparam int unsigned  DEPTH = CF * 2**stage;
+				localparam  RAM_STYLE =
+					DEPTH_TRIGGER_URAM && (DEPTH >= DEPTH_TRIGGER_URAM)? "ultra" :
+					DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" :
+					// If BRAM trigger defined, force distributed memory below if Vivado may be tempted to use BRAM nonetheless.
+					DEPTH_TRIGGER_BRAM && (DEPTH >= 64)? "distributed" : "auto";
+
+				(* RAM_STYLE = RAM_STYLE *)
+				val_t  Threshs[DEPTH];
+				if(THRESHOLDS_PATH != "") begin
+					localparam  FILE = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage);
+					initial  $readmemh(FILE, Threshs);
+				end
+
+				if(USE_CONFIG) begin : genThreshMem
+					uwire  we = (p.op ==? WR) && cs;
+					if((CF == 1) && (stage == 0)) begin
+						always @(posedge clk) begin
+							if(we)  Threshs[0] <= p.val;
+						end
+					end
+					else begin
+						uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
+						always @(posedge clk) begin
+							if(we)  Threshs[addr] <= p.val;
+						end
+					end
+				end : genThreshMem
+
+				if((CF == 1) && (stage == 0)) begin
+					assign	Thresh = Threshs[0];
+				end
+				else begin
+					uwire [$clog2(CF)+stage-1:0]  addr = p.ptr[$clog2(CF)+N-1:SN+1];
+					always_ff @(posedge clk) begin
+						Thresh <= Threshs[addr];
+					end
+				end
+
+			end : blkThresh
+
+			// Pipeline State
+			pipe_t  P = '{ op: NOP, default: 'x };
+			logic   Reval = 0;
+			always_ff @(posedge clk) begin
+				if(rst) begin
+					P <= '{ op: NOP, default: 'x };
+					Reval <= 0;
+				end
+				else begin
+					P <= p;
+					Reval <= (p.op ==? RB) && cs;
+				end
+			end
+
+			logic  cmp;
+			if(!SIGNED)		assign	cmp = $unsigned(Thresh) <= $unsigned(P.val);
+			else if(!FPARG)	assign	cmp =   $signed(Thresh) <=   $signed(P.val);
+			else begin : blkSignedFloat
+				uwire  mag_eq = Thresh[K-2:0] == P.val[K-2:0];
+				uwire  mag_le = Thresh[K-2:0] <= P.val[K-2:0];
+				always_comb begin
+					unique case({Thresh[K-1], P.val[K-1]})
+					2'b00:  cmp = mag_le;
+					2'b01:  cmp = 0;
+					2'b10:  cmp = 1;
+					2'b11:  cmp = !mag_le || mag_eq;
+					default: cmp = 'x;
+					endcase
+				end
+			end : blkSignedFloat
+
+			// Pipeline State Update
+			pipe_t  pp;
+			always_comb begin
+				pp = P;
+				if(P.op !=? CFG)  pp.ptr[SN] = cmp;
+				if(Reval)         pp.val = Thresh;
+			end
+
+			// Pipeline State Forward (potentially additional register)
+			pipe_t  pf;
+			if(!DEEP_PIPELINE)  assign  pf = pp;
+			else begin
+				pipe_t  Pf = '{ op: NOP, default: 'x };
+				always_ff @(posedge clk) begin
+					if(rst)  Pf <= '{ op: NOP, default: 'x };
+					else     Pf <= pp;
+				end
+				assign	pf = Pf;
+			end
+
+			assign	pipe[pe][stage+1] = pf;
+
+		end : genPE
+	end : genStages
+
+	//-----------------------------------------------------------------------
+	// Configuration Readback
+	always_comb begin
+		cfg_rack = 0;
+		cfg_q = 0;
+		foreach(pipe[pe]) begin
+			automatic pipe_t  p = pipe[pe][N];
+			cfg_rack |= p.op ==? RB;
+			cfg_q    |= p.val;
+		end
+	end
+
+	//-----------------------------------------------------------------------
+	// Stream Output through FIFO
+	//	- Depth of N + Output Reg to allow pipe to drain entirely under backpressure
+	//	- Typically mapped to an SRL shift register
+	if(1) begin : blkStreamOutput
+		localparam int unsigned  A_DEPTH = MAX_PENDING - 1;
+		logic        [PE-1 : 0][N-1 : 0]  ADat[A_DEPTH];
+		logic signed [$clog2(A_DEPTH):0]  APtr = '1;	// -1, 0, 1, ..., A_DEPTH-1
+		uwire  avld = !APtr[$left(APtr)];
+
+		logic [PE-1:0][N-1:0]  BDat = 'x;
+		logic  BVld =  0;
+
+		uwire  aload = pipe[0][N].op ==? TH;
+		uwire  bload = !BVld || ordy;
+
+		always_ff @(posedge clk) begin
+			if(aload) begin
+				assert(APtr < $signed(A_DEPTH-1)) else begin
+					$error("Overrun after failing stream guard.");
+					$stop;
+				end
+				foreach(pipe[pe])  ADat[0][pe] <= pipe[pe][N].ptr;
+				for(int unsigned  i = 1; i < A_DEPTH; i++)  ADat[i] <= ADat[i-1];
+			end
+		end
+		always_ff @(posedge clk) begin
+			if(rst)  APtr <= '1;
+			else     APtr <= APtr + (aload == (avld && bload)? 0 : aload? 1 : -1);
+		end
+		always_ff @(posedge clk) begin
+			if(rst) begin
+				BDat <= 'x;
+				BVld <=  0;
+			end
+			else if(bload) begin
+				BDat <= ADat[APtr];
+				BVld <= avld;
+			end
+		end
+
+		assign	ovld = BVld;
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			assign	odat[pe] = BDat[pe] + BIAS;
+		end
+	end : blkStreamOutput
+
+endmodule : thresholding
diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
new file mode 100644
index 0000000000..1f235b9486
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv
@@ -0,0 +1,164 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	All-AXI interface adapter for thresholding module.
+ * @author	Thomas B. Preußer <tpreusse@amd.com>
+ *
+ * @description
+ *	This AXI adapter fits the core thresholding functionality:
+ *	- with AXI stream data interfaces with flow control
+ *	- with implicit round-robin channel rotation as used by FINN, and
+ *	- performs aligned byte address to parameter word address translation.
+ *****************************************************************************/
+
+module thresholding_axi #(
+	int unsigned  N,		// output precision
+	int unsigned  K,		// input/threshold precision
+	int unsigned  C = 1,	// Channels
+	int unsigned  PE = 1,	// Processing Parallelism, requires C = k*PE
+
+	bit  SIGNED = 1,	// signed inputs
+	bit  FPARG  = 0,	// floating-point inputs: [sign] | exponent | mantissa
+	int  BIAS   = 0,	// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+
+	// Initial Thresholds
+	parameter  THRESHOLDS_PATH = "",
+
+	bit  USE_AXILITE,	// Implement AXI-Lite for threshold read/write
+
+	// Force Use of On-Chip Memory Blocks
+	int unsigned  DEPTH_TRIGGER_URAM = 0,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	int unsigned  DEPTH_TRIGGER_BRAM = 0,	// if non-zero, local mems of this depth or more go into BRAM
+	bit  DEEP_PIPELINE = 0,
+
+	localparam int unsigned  CF = C/PE,	// Channel Fold
+	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2,
+	localparam int unsigned  O_BITS = BIAS >= 0?
+		/* unsigned */ $clog2(2**N+BIAS) :
+		/* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS)
+)(
+	//- Global Control ------------------
+	input	logic  ap_clk,
+	input	logic  ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input	logic                  s_axilite_AWVALID,
+	output	logic                  s_axilite_AWREADY,
+	input	logic [ADDR_BITS-1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+
+	input	logic         s_axilite_WVALID,
+	output	logic         s_axilite_WREADY,
+	input	logic [31:0]  s_axilite_WDATA,
+	input	logic [ 3:0]  s_axilite_WSTRB,
+
+	output	logic        s_axilite_BVALID,
+	input	logic        s_axilite_BREADY,
+	output	logic [1:0]  s_axilite_BRESP,
+
+	// Reading
+	input	logic                  s_axilite_ARVALID,
+	output	logic                  s_axilite_ARREADY,
+	input	logic [ADDR_BITS-1:0]  s_axilite_ARADDR,
+
+	output	logic         s_axilite_RVALID,
+	input	logic         s_axilite_RREADY,
+	output	logic [31:0]  s_axilite_RDATA,
+	output	logic [ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output	logic  s_axis_tready,
+	input	logic  s_axis_tvalid,
+	input	logic [((PE*K+7)/8)*8-1:0]  s_axis_tdata,
+
+	//- AXI Stream - Output -------------
+	input	logic  m_axis_tready,
+	output	logic  m_axis_tvalid,
+	output	logic [((PE*O_BITS+7)/8)*8-1:0]  m_axis_tdata
+);
+
+	//-----------------------------------------------------------------------
+	// AXI-lite Configuration Interface
+	uwire  cfg_en;
+	uwire  cfg_we;
+	uwire [ADDR_BITS-3:0]  cfg_a;
+	uwire [K        -1:0]  cfg_d;
+	uwire  cfg_rack;
+	uwire [K        -1:0]  cfg_q;
+
+	if(USE_AXILITE) begin
+		uwire [ADDR_BITS-1:0]  cfg_a0;
+		axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi (
+			.aclk(ap_clk), .aresetn(ap_rst_n),
+
+			.awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x),
+			.wready(s_axilite_WREADY),   .wvalid(s_axilite_WVALID),   .wdata(s_axilite_WDATA),   .wstrb(s_axilite_WSTRB),
+			.bready(s_axilite_BREADY),   .bvalid(s_axilite_BVALID),   .bresp(s_axilite_BRESP),
+
+			.arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x),
+			.rready(s_axilite_RREADY),   .rvalid(s_axilite_RVALID),   .rresp(s_axilite_RRESP),   .rdata(s_axilite_RDATA),
+
+			.ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a0), .ip_wdata(cfg_d),
+			.ip_rack(cfg_rack), .ip_rdata(cfg_q)
+		);
+		assign	cfg_a = cfg_a0[ADDR_BITS-3:0];
+		always_ff @(posedge ap_clk) begin
+			assert(!ap_rst_n || !cfg_en || (cfg_a0[ADDR_BITS-2+:2] === 3'h0)) else begin
+				$error("%m: Spurious high address bits.");
+				$stop;
+			end
+		end
+	end
+	else begin
+		assign	cfg_en =  0;
+		assign	cfg_we = 'x;
+		assign	cfg_a  = 'x;
+		assign	cfg_d  = 'x;
+	end
+
+	//-----------------------------------------------------------------------
+	// Kernel Implementation
+	thresholding #(
+		.N(N), .K(K), .C(C), .PE(PE),
+		.SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS),
+		.THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE),
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM),
+		.DEEP_PIPELINE(DEEP_PIPELINE)
+	) impl (
+		.clk(ap_clk), .rst(!ap_rst_n),
+
+		.cfg_en, .cfg_we, .cfg_a, .cfg_d,
+		.cfg_rack, .cfg_q,
+
+		.irdy(s_axis_tready), .ivld(s_axis_tvalid), .idat(s_axis_tdata),
+		.ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata)
+	);
+
+endmodule : thresholding_axi
diff --git a/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v
new file mode 100644
index 0000000000..3f0b012ef1
--- /dev/null
+++ b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c) 2023, Xilinx
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of FINN nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @author	Thomas B. Preußer <thomas.preusser@amd.com>
+ * @brief	Verilog wrapper for IP packaging.
+ */
+
+module thresholding_template_wrapper #(
+	parameter  N = $N$,	// output precision
+	parameter  K = $M$,	// input/threshold precision
+	parameter  C = $C$,	// Channels
+	parameter  PE = $PE$,	// Processing Parallelism, requires C = k*PE
+
+	parameter  SIGNED = $SIGNED$,	// signed inputs
+	parameter  FPARG  = 0,			// floating-point inputs: [sign] | exponent | mantissa
+	parameter  BIAS   = $BIAS$,		// offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS]
+
+	parameter  THRESHOLDS_PATH = $THRESHOLDS_PATH$,	// Directory with initial threshold data
+	parameter  USE_AXILITE = $USE_AXILITE$,	// Implement AXI-Lite for threshold read/write
+
+	// Force Use of On-Chip Memory Blocks
+	parameter  DEPTH_TRIGGER_URAM = $DEPTH_TRIGGER_URAM$,	// if non-zero, local mems of this depth or more go into URAM (prio)
+	parameter  DEPTH_TRIGGER_BRAM = $DEPTH_TRIGGER_BRAM$,	// if non-zero, local mems of this depth or more go into BRAM
+	parameter  DEEP_PIPELINE = $DEEP_PIPELINE$,	// [bit] extra pipeline stages for easier timing closure
+
+	parameter  O_BITS = $O_BITS$
+)(
+	// Global Control
+	(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *)
+	(* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *)
+	input	ap_clk,
+	(* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *)
+	input	ap_rst_n,
+
+	//- AXI Lite ------------------------
+	// Writing
+	input   s_axilite_AWVALID,
+	output  s_axilite_AWREADY,
+	input [$clog2(C/PE) + $clog2(PE) + N + 1:0]  s_axilite_AWADDR,	// lowest 2 bits (byte selectors) are ignored
+
+	input         s_axilite_WVALID,
+	output        s_axilite_WREADY,
+	input [31:0]  s_axilite_WDATA,
+	input [ 3:0]  s_axilite_WSTRB,
+
+	output        s_axilite_BVALID,
+	input         s_axilite_BREADY,
+	output [1:0]  s_axilite_BRESP,
+
+	// Reading
+	input   s_axilite_ARVALID,
+	output  s_axilite_ARREADY,
+	input [$clog2(C/PE) + $clog2(PE) + N + 1:0]  s_axilite_ARADDR,
+
+	output         s_axilite_RVALID,
+	input          s_axilite_RREADY,
+	output [31:0]  s_axilite_RDATA,
+	output [ 1:0]  s_axilite_RRESP,
+
+	//- AXI Stream - Input --------------
+	output  in0_V_tready,
+	input   in0_V_tvalid,
+	input [((PE*K+7)/8)*8-1:0]  in0_V_tdata,
+
+	//- AXI Stream - Output -------------
+	input   out_V_tready,
+	output  out_V_tvalid,
+	output [((PE*O_BITS+7)/8)*8-1:0]  out_V_tdata
+);
+
+	thresholding_axi #(
+		.N(N), .K(K), .C(C), .PE(PE),
+		.SIGNED(SIGNED),
+		.FPARG(FPARG),
+		.BIAS(BIAS),
+		.THRESHOLDS_PATH(THRESHOLDS_PATH),
+		.USE_AXILITE(USE_AXILITE),
+		.DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM),
+		.DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM),
+		.DEEP_PIPELINE(DEEP_PIPELINE)
+	) core (
+		.ap_clk(ap_clk), .ap_rst_n(ap_rst_n),
+
+		.s_axilite_AWVALID(s_axilite_AWVALID), .s_axilite_AWREADY(s_axilite_AWREADY), .s_axilite_AWADDR(s_axilite_AWADDR),
+		.s_axilite_WVALID(s_axilite_WVALID), .s_axilite_WREADY(s_axilite_WREADY), .s_axilite_WDATA(s_axilite_WDATA), .s_axilite_WSTRB(s_axilite_WSTRB),
+		.s_axilite_BVALID(s_axilite_BVALID), .s_axilite_BREADY(s_axilite_BREADY), .s_axilite_BRESP(s_axilite_BRESP),
+
+		.s_axilite_ARVALID(s_axilite_ARVALID), .s_axilite_ARREADY(s_axilite_ARREADY), .s_axilite_ARADDR(s_axilite_ARADDR),
+		.s_axilite_RVALID(s_axilite_RVALID), .s_axilite_RREADY(s_axilite_RREADY), .s_axilite_RDATA(s_axilite_RDATA), .s_axilite_RRESP(s_axilite_RRESP),
+		.s_axis_tready(in0_V_tready), .s_axis_tvalid(in0_V_tvalid), .s_axis_tdata(in0_V_tdata),
+		.m_axis_tready(out_V_tready), .m_axis_tvalid(out_V_tvalid), .m_axis_tdata(out_V_tdata)
+	);
+
+endmodule // thresholding_template_wrapper
diff --git a/finn-rtllib/thresholding/sim/thresh_gen.sv b/finn-rtllib/thresholding/sim/thresh_gen.sv
new file mode 100644
index 0000000000..a8a18be691
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresh_gen.sv
@@ -0,0 +1,45 @@
+module thresh_gen;
+	localparam int unsigned  K = 9;
+	localparam int unsigned  N = 4;
+	localparam int unsigned  C = 6;
+
+	typedef logic [K-1:0]  thresh_t;
+	localparam thresh_t  THRESHOLDS[C][2**N-1] = '{
+		'{ 'h00, 'h01, 'h02, 'h03, 'h04, 'h05, 'h06, 'h07, 'h08, 'h09, 'h0a, 'h0b, 'h0c, 'h0d, 'h0e },
+		'{ 'h10, 'h11, 'h12, 'h13, 'h14, 'h15, 'h16, 'h17, 'h18, 'h19, 'h1a, 'h1b, 'h1c, 'h1d, 'h1e },
+		'{ 'h20, 'h21, 'h22, 'h23, 'h24, 'h25, 'h26, 'h27, 'h28, 'h29, 'h2a, 'h2b, 'h2c, 'h2d, 'h2e },
+		'{ 'h30, 'h31, 'h32, 'h33, 'h34, 'h35, 'h36, 'h37, 'h38, 'h39, 'h3a, 'h3b, 'h3c, 'h3d, 'h3e },
+		'{ 'h40, 'h41, 'h42, 'h43, 'h44, 'h45, 'h46, 'h47, 'h48, 'h49, 'h4a, 'h4b, 'h4c, 'h4d, 'h4e },
+		'{ 'h50, 'h51, 'h52, 'h53, 'h54, 'h55, 'h56, 'h57, 'h58, 'h59, 'h5a, 'h5b, 'h5c, 'h5d, 'h5e }
+	};
+	localparam  THRESHOLDS_PATH = ".";
+
+	localparam int unsigned  PE = 2;
+	localparam int unsigned  CF = C/PE;
+
+	for(genvar  stage = 0; stage < N; stage++) begin
+		localparam int unsigned  SN = N-1-stage;
+		for(genvar  pe = 0; pe < PE; pe++) begin
+			initial begin
+				automatic string  file = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage);
+
+				automatic thresh_t  threshs[CF * 2**stage];
+				for(int unsigned  c = 0; c < CF; c++) begin
+					for(int unsigned  i = 0; i < 2**stage; i++) begin
+						threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1];
+					end
+				end
+
+				$writememh(file, threshs);
+			end
+		end
+	end
+
+    // Quit after running all initializers
+	initial begin
+		#1ns;
+		$display("Generation done.");
+		$finish;
+	end
+
+endmodule : thresh_gen
diff --git a/finn-rtllib/thresholding/sim/thresholding.tcl b/finn-rtllib/thresholding/sim/thresholding.tcl
new file mode 100644
index 0000000000..82dc59deb1
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding.tcl
@@ -0,0 +1,17 @@
+create_project -force thresholding thresholding.vivado -part xcvc1902-vsva2197-2MP-e-S
+set_property board_part xilinx.com:vck190:part0:2.2 [current_project]
+
+read_verilog hdl/axilite_if.v
+read_verilog -sv { hdl/thresholding.sv hdl/thresholding_axi.sv }
+
+set simset [current_fileset -simset]
+set_property -name xsim.simulate.log_all_signals -value true -objects $simset
+set_property -name xsim.simulate.runtime -value all -objects $simset
+add_files -fileset $simset { sim/thresholding_tb.sv sim/thresholding_axi_tb.sv }
+
+foreach top { thresholding_tb thresholding_axi_tb } {
+	set_property top $top $simset
+
+	launch_simulation
+	close_sim
+}
diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
new file mode 100644
index 0000000000..918f539d15
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv
@@ -0,0 +1,314 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *	 this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *	 contributors may be used to endorse or promote products derived from
+ *	 this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Testbench for thresholding_axi.
+ * @author	Monica Chiosa <monica.chiosa@amd.com>
+ *
+ */
+
+module thresholding_axi_tb #(
+	int unsigned  N  = 4,	// output precision
+	int unsigned  C  = 6,	// number of channels
+	int unsigned  PE = 2,
+	real  M0 = 7.3,			// slope of the uniform thresholding line
+	real  B0 = 3.1,			// offset of the uniform thresholding line
+	bit  THROTTLED = 1,
+
+	localparam int unsigned  CF = C/PE,	// Channel Fold
+	localparam int unsigned  ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2
+);
+
+	//-----------------------------------------------------------------------
+	// Design Geometry
+
+	// For each channel = [0,channel):
+	//	 M_channel = M0 + CX*channel
+	//	 B_channel = B0 + CX*channel
+	// Input/threshold precision computed according with the maximum posible value
+	localparam real  CX = 1.375;
+	localparam int unsigned K = $clog2((2**N-1)*(M0+C*CX) + (B0+C*CX)); // unused sign + magnitude
+	localparam int unsigned C_BITS = C < 2? 1 : $clog2(C);
+
+	localparam int unsigned MST_STRM_WROUNDS = 503;
+
+	typedef int unsigned  threshs_t[C][2**N-1];
+	function threshs_t init_thresholds();
+		automatic threshs_t  res;
+		for(int unsigned  c = 0; c < C; c++) begin
+			automatic real  m = M0 + c*CX;
+			automatic real  b = B0 + c*CX;
+			foreach(res[c][i]) begin
+				res[c][i] = int'($ceil(m*i + b));
+			end
+		end
+		return  res;
+	endfunction : init_thresholds
+	localparam threshs_t  THRESHS = init_thresholds();
+
+	//-----------------------------------------------------------------------
+	// Clock and Reset Control
+	logic  clk = 0;
+	always #5ns clk = !clk;
+	logic  rst = 1;
+	initial begin
+		#10ns;
+		@(posedge clk);
+		rst <= 0;
+	end
+
+	//-----------------------------------------------------------------------
+	// DUT
+	logic                  s_axilite_AWVALID;
+	uwire                  s_axilite_AWREADY;
+	logic [ADDR_BITS-1:0]  s_axilite_AWADDR;	// lowest 2 bits (byte selectors) are ignored
+	logic                  s_axilite_WVALID;
+	uwire                  s_axilite_WREADY;
+	logic [         31:0]  s_axilite_WDATA;
+	uwire                  s_axilite_BVALID;
+	logic                  s_axilite_BREADY;
+	uwire [          1:0]  s_axilite_BRESP;
+	logic                  s_axilite_ARVALID;
+	uwire                  s_axilite_ARREADY;
+	logic [ADDR_BITS-1:0]  s_axilite_ARADDR;
+	uwire                  s_axilite_RVALID;
+	uwire                  s_axilite_RREADY = 1;
+	uwire [         31:0]  s_axilite_RDATA;
+	uwire [          1:0]  s_axilite_RRESP;
+
+	uwire  irdy;
+	logic  ivld;
+	logic [PE-1:0][K-1:0]  idat;
+
+	logic  ordy = 0;
+	uwire  ovld;
+	uwire [PE-1:0][N-1:0]  odat;
+
+	thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .USE_AXILITE(1)) dut (
+		.ap_clk(clk), .ap_rst_n(!rst),
+
+		// Configuration
+		.s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR,
+		.s_axilite_WVALID,  .s_axilite_WREADY,  .s_axilite_WDATA, .s_axilite_WSTRB('1),
+		.s_axilite_BVALID,  .s_axilite_BREADY,  .s_axilite_BRESP,
+		.s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR,
+		.s_axilite_RVALID,  .s_axilite_RREADY,  .s_axilite_RDATA, .s_axilite_RRESP,
+
+		// Stream Processing
+		.s_axis_tready(irdy), .s_axis_tvalid(ivld), .s_axis_tdata(idat),
+		.m_axis_tready(ordy), .m_axis_tvalid(ovld), .m_axis_tdata(odat)
+	);
+
+	//-----------------------------------------------------------------------
+	// Input Stimuli
+	typedef logic [PE-1:0][K-1:0]  input_t;
+	typedef logic [$clog2(CF)+$clog2(PE)+N-1:0]  addr_t;
+	input_t  QW[$];  // Input Feed Tracing
+	addr_t   QC[$];
+
+	int unsigned  error_cnt = 0;
+	bit  done = 0;
+	initial begin
+		// Report testbench details
+		$display("Testbench - tresholding K=%0d -> N=%0d", K, N);
+		for(int unsigned  c = 0; c < C; c++) begin
+			$write("Channel #%0d: Thresholds = {", c);
+			for(int unsigned  i = 0; i < 2**N-1; i++)  $write(" %0d", THRESHS[c][i]);
+			$display(" }");
+		end
+
+		// Config
+		s_axilite_AWVALID = 0;
+		s_axilite_AWADDR  = 'x;
+		s_axilite_WVALID  = 0;
+		s_axilite_WDATA   = 'x;
+		s_axilite_BREADY  = 0;
+		s_axilite_ARVALID = 0;
+		s_axilite_ARADDR  = 'x;
+
+		// Stream Input
+		ivld = 0;
+		idat = 'x;
+
+		@(posedge clk iff !rst);
+
+		// Threshold Configuration
+		for(int unsigned  c = 0; c < C; c+=PE) begin
+			automatic addr_t  addr = 0;
+			if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = c/PE;
+			for(int unsigned  pe = 0; pe < PE; pe++) begin
+				if(PE > 1)  addr[N+:$clog2(PE)] = pe;
+				for(int unsigned  t = 0; t < 2**N-1; t++) begin
+					addr[0+:N] = t;
+					fork
+						begin
+							s_axilite_AWVALID <= 1;
+							s_axilite_AWADDR  <= { addr, 2'b00 };
+							@(posedge clk iff s_axilite_AWREADY);
+							s_axilite_AWVALID <= 0;
+							s_axilite_AWADDR  <= 'x;
+						end
+						begin
+							s_axilite_WVALID <= 1;
+							s_axilite_WDATA  <= THRESHS[c+pe][t];
+							@(posedge clk iff s_axilite_WREADY);
+							s_axilite_WVALID <= 0;
+							s_axilite_WDATA  <= 'x;
+						end
+						begin
+							s_axilite_BREADY <= 1;
+							@(posedge clk iff s_axilite_BVALID);
+							assert(s_axilite_BRESP == '0) else begin
+								$error("Error on parameter write.");
+								$stop;
+							end
+							s_axilite_BREADY <= 0;
+						end
+					join
+				end
+			end
+		end
+
+		fork
+			// Intermittent configuration readback
+			while(!done) begin
+				if(($urandom()%37) != 0) begin
+					s_axilite_ARVALID <= 0;
+					s_axilite_ARADDR  <= 'x;
+					@(posedge clk);
+				end
+				else begin
+					automatic addr_t  addr = $urandom()%(N-1);
+					if(PE > 1)  addr[N+:$clog2(PE)] = $urandom()%PE;
+					if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF;
+
+					s_axilite_ARVALID <= 1;
+					s_axilite_ARADDR  <= { addr, 2'b00 };
+					@(posedge clk iff s_axilite_ARREADY);
+
+					QC.push_back(addr);
+				end
+			end
+
+			// AXI4Stream MST Writes input values
+			repeat(MST_STRM_WROUNDS) begin
+				automatic input_t  dat;
+
+				while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk);
+
+				std::randomize(dat);
+				ivld <= 1;
+				idat <= dat;
+				@(posedge clk iff irdy);
+				ivld <=  0;
+				idat <= 'x;
+				QW.push_back(dat);
+			end
+		join_any
+		done <= 1;
+		repeat(N+6)  @(posedge clk);
+
+		assert(QW.size() == 0) else begin
+			$error("Missing %0d outputs.", QW.size());
+			$stop;
+		end
+		assert(QC.size() == 0) else begin
+			$error("Missing %0d readback replies.", QC.size());
+			$stop;
+		end
+
+		$display("Test completed: %0d errors in %0d tests.", error_cnt, MST_STRM_WROUNDS);
+		$display("=========================================");
+		$finish;
+	end
+
+	// Output Checker -------------------------------------------------------
+
+	// Configuration Readback
+	always_ff @(posedge clk iff s_axilite_RVALID) begin
+		assert(s_axilite_RRESP == '0) else begin
+			$error("Read back error.");
+			$stop;
+		end
+		assert(QC.size()) begin
+			automatic addr_t  addr = QC.pop_front();
+			automatic int unsigned  cnl =
+				(CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) +
+				(PE == 1? 0 : addr[N+:$clog2(PE)]);
+			automatic logic [K-1:0]  exp = THRESHS[cnl][addr[0+:N]];
+			assert(s_axilite_RDATA == exp) else begin
+				$error("Readback mismatch on #%0d.%0d: %0d instead of %0d", cnl, addr[0+:N], s_axilite_RDATA, exp);
+				$stop;
+			end
+		end
+		else begin
+			$error("Spurious readback output.");
+			$stop;
+		end
+	end
+
+	// Stream Output
+	int unsigned  OCnl = 0;
+	always @(posedge clk) begin
+		if(rst) begin
+			OCnl <= 0;
+			ordy <= 1'b0;
+		end
+		else begin
+			if(!ordy || ovld)  ordy <= ($urandom()%5 != 0) || !THROTTLED;
+
+			if(ordy && ovld) begin
+				assert(QW.size()) begin
+					automatic input_t  x = QW.pop_front();
+
+					for(int unsigned  pe = 0; pe < PE; pe++) begin
+						automatic int unsigned  cnl = OCnl + pe;
+
+						$display("Mapped CNL=%0d DAT=%3d -> #%2d", cnl, x[pe], odat[pe]);
+						assert(
+							((odat[pe] == 0) || (THRESHS[cnl][odat[pe]-1] <= x[pe])) &&
+							((odat[pe] == 2**N-1) || (x[pe] < THRESHS[cnl][odat[pe]]))
+						) else begin
+							$error("Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", cnl, x[pe], odat[pe]);
+							error_cnt++;
+							$stop;
+						end
+					end
+				end
+				else begin
+					$error("Spurious output.");
+					$stop;
+				end
+
+				OCnl <= (OCnl + PE)%C;
+			end
+		end
+	end
+
+endmodule: thresholding_axi_tb
diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv
new file mode 100644
index 0000000000..e42145f10e
--- /dev/null
+++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv
@@ -0,0 +1,274 @@
+/******************************************************************************
+ * Copyright (C) 2022, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *	 this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *	 contributors may be used to endorse or promote products derived from
+ *	 this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @brief	Testbench for thresholding_axi.
+ * @author	Monica Chiosa <monica.chiosa@amd.com>
+ *
+ */
+
+module thresholding_tb #(
+	int unsigned  K  = 10,	// input precision
+	int unsigned  N  =  4,	// output precision
+	int unsigned  C  =  6,	// number of channels
+	int unsigned  PE =  2,
+
+	localparam int unsigned  CF = C/PE	// Channel Fold
+);
+    localparam bit  DEEP_PIPELINE = 1;
+
+	localparam int unsigned  MST_STRM_WROUNDS = 507;
+	localparam bit  THROTTLED = 1;
+
+	//-----------------------------------------------------------------------
+	// Clock and Reset Control
+	logic  clk = 0;
+	always #5ns clk = !clk;
+	logic  rst = 1;
+	initial begin
+		#10ns;
+		@(posedge clk);
+		rst <= 0;
+	end
+
+	//-----------------------------------------------------------------------
+	// Parallel Instances differing in Data Type
+	typedef logic [K -1:0]  val_t;
+	typedef val_t  threshs_t[C][2**N-1];
+	typedef val_t [PE-1:0]  input_t;
+	typedef logic [$clog2(CF)+$clog2(PE)+N-1:0]  addr_t;
+	logic [0:2]  term = '0;
+	always_comb begin
+		if(&term)  $finish;
+	end
+	for(genvar  i = 0; i < 3; i++) begin : genTypes
+		localparam bit  SIGNED = i>0;
+		localparam bit  FPARG  = i>1;
+
+		//- DUT -------------------------
+		logic  cfg_en;
+		logic  cfg_we;
+		logic [$clog2(C)+N-1:0]  cfg_a;
+		logic [K-1:0]  cfg_d;
+		uwire  cfg_rack;
+		uwire [K-1:0]  cfg_q;
+
+		uwire  irdy;
+		logic  ivld;
+		logic [PE-1:0][K-1:0]  idat;
+
+		logic  ordy = 0;
+		uwire  ovld;
+		uwire [PE-1:0][N-1:0]  odat;
+
+		thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1), .DEEP_PIPELINE(DEEP_PIPELINE)) dut (
+			.clk, .rst,
+
+			// Configuration
+			.cfg_en, .cfg_we, .cfg_a, .cfg_d,
+			.cfg_rack, .cfg_q,
+
+			// Stream Processing
+			.irdy, .ivld, .idat,
+			.ordy, .ovld, .odat
+		);
+
+		//- Stimulus Driver -------------
+		threshs_t  THRESHS;
+		function val_t sigord(input val_t  x);
+			automatic val_t  res = x;
+			if(SIGNED) begin
+				if(FPARG && x[K-1])  res[K-2:0] = ~x[K-2:0];
+				res[K-1] = !x[K-1];
+			end
+			return  res;
+		endfunction : sigord
+
+		input_t  QW[$];  // Input tracing
+		addr_t   QC[$];  // Readback tracking
+		int unsigned  error_cnt = 0;
+		bit  done = 0;
+		initial begin
+
+			// Generate thresholds
+			std::randomize(THRESHS);
+			foreach(THRESHS[c]) begin
+				val_t  row[2**N-1] = THRESHS[c];
+				row.sort with (sigord(item));
+				THRESHS[c] = row;
+			end
+
+			// Report test case details
+			$display("[%0d] Thresholding %s%s%0d -> uint%0d", i, SIGNED? "s" : "u", FPARG? "fp" : "int", K, N);
+			for(int unsigned  c = 0; c < C; c++) begin
+				$write("[%0d] Channel #%0d: Thresholds = {", i, c);
+				for(int unsigned  i = 0; i < 2**N-1; i++)  $write(" %0X", THRESHS[c][i]);
+				$display(" }");
+			end
+
+			// Config
+			cfg_en = 0;
+			cfg_we = 'x;
+			cfg_a  = 'x;
+			cfg_d  = 'x;
+
+			// Stream Input
+			ivld = 0;
+			idat = 'x;
+
+			@(posedge clk iff !rst);
+
+			// Threshold Configuratin
+			cfg_en <= 1;
+			cfg_we <= 1;
+			for(int unsigned  c = 0; c < C; c+=PE) begin
+				if(CF > 1)  cfg_a[N+$clog2(PE)+:$clog2(CF)] <= c/PE;
+				for(int unsigned  pe = 0; pe < PE; pe++) begin
+					if(PE > 1)  cfg_a[N+:$clog2(PE)] = pe;
+					for(int unsigned  t = 0; t < 2**N-1; t++) begin
+						cfg_a[0+:N] <= t;
+						cfg_d <= THRESHS[c+pe][t];
+						@(posedge clk);
+					end
+				end
+			end
+			cfg_d <= 'x;
+
+			fork
+				// Intermittent configuration readback
+				while(!done) begin
+					cfg_en <= 0;
+					cfg_we <= 'x;
+					cfg_a  <= 'x;
+					@(posedge clk);
+					if(($urandom()%41) == 0) begin
+						automatic addr_t  addr = $urandom()%(N-1);
+						if(PE > 1)  addr[N+:$clog2(PE)] = $urandom()%PE;
+						if(CF > 1)  addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF;
+
+						cfg_en <= 1;
+						cfg_we <= 0;
+						cfg_a  <= addr;
+						@(posedge clk);
+						QC.push_back(addr);
+					end
+				end
+
+				// AXI4Stream MST Writes input values
+				repeat(MST_STRM_WROUNDS) begin
+					automatic input_t  dat;
+
+					while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk);
+
+					std::randomize(dat);
+					ivld <= 1;
+					idat <= dat;
+					@(posedge clk iff irdy);
+					ivld <=  0;
+					idat <= 'x;
+					QW.push_back(dat);
+				end
+			join_any
+			done <= 1;
+			repeat((DEEP_PIPELINE+1)*N+6)  @(posedge clk);
+
+			assert(QW.size() == 0) else begin
+				$error("[%0d] Missing %0d outputs.", i, QW.size());
+				$stop;
+			end
+			assert(QC.size() == 0) else begin
+				$error("[%0d] Missing %0d readback replies.", i, QC.size());
+				$stop;
+			end
+
+			$display("[%0d] Test completed: %0d errors in %0d tests.", i, error_cnt, MST_STRM_WROUNDS);
+			$display("=============================================");
+			term[i] <= 1;
+		end
+
+		//- Readback Checker --------------
+		always_ff @(posedge clk iff cfg_rack) begin
+			assert(QC.size()) begin
+				automatic addr_t  addr = QC.pop_front();
+				automatic int unsigned  cnl =
+					(CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) +
+					(PE == 1? 0 : addr[N+:$clog2(PE)]);
+				automatic logic [K-1:0]  exp = THRESHS[cnl][addr[0+:N]];
+				assert(cfg_q == exp) else begin
+					$error("[%0d] Readback mismatch on #%0d.%0d: %0d instead of %0d", i, cnl, addr[0+:N], cfg_q, exp);
+					$stop;
+				end
+			end
+			else begin
+				$error("[%0d] Spurious readback output.", i);
+				$stop;
+			end
+		end
+
+		// Output Checker
+		int unsigned  OCnl = 0;
+		always @(posedge clk) begin
+			if(rst) begin
+				OCnl <= 0;
+				ordy <= 1'b0;
+			end
+			else begin
+				if(!ordy || ovld)  ordy <= ($urandom()%5 != 0) || !THROTTLED;
+
+				if(ordy && ovld) begin
+					assert(QW.size()) begin
+						automatic input_t  x = QW.pop_front();
+
+						for(int unsigned  pe = 0; pe < PE; pe++) begin
+							automatic int unsigned  cnl = OCnl + pe;
+
+							$display("[%0d] Mapped CNL=%0d DAT=%3x -> #%2d", i, cnl, x[pe], odat[pe]);
+							assert(
+								((odat[pe] == 0) || (sigord(THRESHS[cnl][odat[pe]-1]) <= sigord(x[pe]))) &&
+								((odat[pe] == 2**N-1) || (sigord(x[pe]) < sigord(THRESHS[cnl][odat[pe]])))
+							) else begin
+								$error("[%0d] Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", i, cnl, x[pe], odat[pe]);
+								error_cnt++;
+								$stop;
+							end
+						end
+					end
+					else begin
+						$error("[%0d] Spurious output.", i);
+						$stop;
+					end
+
+					OCnl <= (OCnl + PE)%C;
+				end
+			end
+		end
+
+	end : genTypes
+
+endmodule: thresholding_tb
diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl
new file mode 100644
index 0000000000..338304fa40
--- /dev/null
+++ b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl
@@ -0,0 +1,187 @@
+
+# Loading additional proc with user specified bodies to compute parameter values.
+source [file join [file dirname [file dirname [info script]]] gui/thresholding_axi_v1_0.gtcl]
+
+# Definitional proc to organize widgets for parameters.
+proc init_gui { IPINST } {
+  ipgui::add_param $IPINST -name "Component_Name"
+  #Adding Page
+  set Page_0 [ipgui::add_page $IPINST -name "Page 0"]
+  ipgui::add_param $IPINST -name "ADDR_BITS" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "BIAS" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "C" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "CF" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "FPARG" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "K" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "N" -parent ${Page_0}
+  ipgui::add_param $IPINST -name "O_BITS" -parent ${Page_0}
+  set PE [ipgui::add_param $IPINST -name "PE" -parent ${Page_0}]
+  set_property tooltip {PE Count} ${PE}
+  ipgui::add_param $IPINST -name "SIGNED" -parent ${Page_0}
+
+
+}
+
+proc update_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS PARAM_VALUE.C PARAM_VALUE.PE PARAM_VALUE.N } {
+	# Procedure called to update ADDR_BITS when any of the dependent parameters in the arguments change
+
+	set ADDR_BITS ${PARAM_VALUE.ADDR_BITS}
+	set C ${PARAM_VALUE.C}
+	set PE ${PARAM_VALUE.PE}
+	set N ${PARAM_VALUE.N}
+	set values(C) [get_property value $C]
+	set values(PE) [get_property value $PE]
+	set values(N) [get_property value $N]
+	set_property value [gen_USERPARAMETER_ADDR_BITS_VALUE $values(C) $values(PE) $values(N)] $ADDR_BITS
+}
+
+proc validate_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS } {
+	# Procedure called to validate ADDR_BITS
+	return true
+}
+
+proc update_PARAM_VALUE.CF { PARAM_VALUE.CF PARAM_VALUE.C PARAM_VALUE.PE } {
+	# Procedure called to update CF when any of the dependent parameters in the arguments change
+
+	set CF ${PARAM_VALUE.CF}
+	set C ${PARAM_VALUE.C}
+	set PE ${PARAM_VALUE.PE}
+	set values(C) [get_property value $C]
+	set values(PE) [get_property value $PE]
+	set_property value [gen_USERPARAMETER_CF_VALUE $values(C) $values(PE)] $CF
+}
+
+proc validate_PARAM_VALUE.CF { PARAM_VALUE.CF } {
+	# Procedure called to validate CF
+	return true
+}
+
+proc update_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS PARAM_VALUE.BIAS PARAM_VALUE.N } {
+	# Procedure called to update O_BITS when any of the dependent parameters in the arguments change
+
+	set O_BITS ${PARAM_VALUE.O_BITS}
+	set BIAS ${PARAM_VALUE.BIAS}
+	set N ${PARAM_VALUE.N}
+	set values(BIAS) [get_property value $BIAS]
+	set values(N) [get_property value $N]
+	set_property value [gen_USERPARAMETER_O_BITS_VALUE $values(BIAS) $values(N)] $O_BITS
+}
+
+proc validate_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS } {
+	# Procedure called to validate O_BITS
+	return true
+}
+
+proc update_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } {
+	# Procedure called to update BIAS when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } {
+	# Procedure called to validate BIAS
+	return true
+}
+
+proc update_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to update C when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.C { PARAM_VALUE.C } {
+	# Procedure called to validate C
+	return true
+}
+
+proc update_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } {
+	# Procedure called to update FPARG when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } {
+	# Procedure called to validate FPARG
+	return true
+}
+
+proc update_PARAM_VALUE.K { PARAM_VALUE.K } {
+	# Procedure called to update K when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.K { PARAM_VALUE.K } {
+	# Procedure called to validate K
+	return true
+}
+
+proc update_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to update N when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.N { PARAM_VALUE.N } {
+	# Procedure called to validate N
+	return true
+}
+
+proc update_PARAM_VALUE.PE { PARAM_VALUE.PE } {
+	# Procedure called to update PE when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.PE { PARAM_VALUE.PE } {
+	# Procedure called to validate PE
+	return true
+}
+
+proc update_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } {
+	# Procedure called to update SIGNED when any of the dependent parameters in the arguments change
+}
+
+proc validate_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } {
+	# Procedure called to validate SIGNED
+	return true
+}
+
+
+proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N}
+}
+
+proc update_MODELPARAM_VALUE.K { MODELPARAM_VALUE.K PARAM_VALUE.K } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.K}] ${MODELPARAM_VALUE.K}
+}
+
+proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C}
+}
+
+proc update_MODELPARAM_VALUE.PE { MODELPARAM_VALUE.PE PARAM_VALUE.PE } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.PE}] ${MODELPARAM_VALUE.PE}
+}
+
+proc update_MODELPARAM_VALUE.SIGNED { MODELPARAM_VALUE.SIGNED PARAM_VALUE.SIGNED } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.SIGNED}] ${MODELPARAM_VALUE.SIGNED}
+}
+
+proc update_MODELPARAM_VALUE.FPARG { MODELPARAM_VALUE.FPARG PARAM_VALUE.FPARG } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.FPARG}] ${MODELPARAM_VALUE.FPARG}
+}
+
+proc update_MODELPARAM_VALUE.BIAS { MODELPARAM_VALUE.BIAS PARAM_VALUE.BIAS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.BIAS}] ${MODELPARAM_VALUE.BIAS}
+}
+
+proc update_MODELPARAM_VALUE.CF { MODELPARAM_VALUE.CF PARAM_VALUE.CF } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.CF}] ${MODELPARAM_VALUE.CF}
+}
+
+proc update_MODELPARAM_VALUE.ADDR_BITS { MODELPARAM_VALUE.ADDR_BITS PARAM_VALUE.ADDR_BITS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.ADDR_BITS}] ${MODELPARAM_VALUE.ADDR_BITS}
+}
+
+proc update_MODELPARAM_VALUE.O_BITS { MODELPARAM_VALUE.O_BITS PARAM_VALUE.O_BITS } {
+	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
+	set_property value [get_property value ${PARAM_VALUE.O_BITS}] ${MODELPARAM_VALUE.O_BITS}
+}
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 56d4230a3a..425b9bf4f6 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -59,6 +59,9 @@
 from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
 from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
+from finn.custom_op.fpgadataflow.thresholding_binary_search import (
+    Thresholding_Binary_Search,
+)
 from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
 from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
 from finn.custom_op.fpgadataflow.vectorvectoractivation import VectorVectorActivation
@@ -80,6 +83,7 @@
 custom_op["Pool_Batch"] = Pool_Batch
 custom_op["FMPadding_Batch"] = FMPadding_Batch
 custom_op["Thresholding_Batch"] = Thresholding_Batch
+custom_op["Thresholding_Binary_Search"] = Thresholding_Binary_Search
 custom_op["AddStreams_Batch"] = AddStreams_Batch
 custom_op["LabelSelect_Batch"] = LabelSelect_Batch
 custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
new file mode 100755
index 0000000000..d02b778823
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py
@@ -0,0 +1,579 @@
+# Copyright (C) 2022, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import warnings
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.basic import find_next_power_of_2, get_rtlsim_trace_depth, make_build_dir
+from finn.util.data_packing import (
+    npy_to_rtlsim_input,
+    pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
+)
+
+try:
+    from pyverilator import PyVerilator
+except ModuleNotFoundError:
+    PyVerilator = None
+
+"""@package thresholding_binary_search
+- ONNX i/o tensor shape assumptions for Thresholding:
+- input 0 is the input tensor, shape (..., NumChannels)
+- input 1 is the threshold tensor, shape (NumChannels, n_thres)
+- output 0 is the output tensor, shape (..., NumChannels) - same as input
+- the '...' here can be any shape (representing groups of vectors)
+
+This module creates an RTL IP, HLS is not supported. See 'thresholding_batch'
+for a HLS equivalent.
+"""
+
+
+class Thresholding_Binary_Search(HLSCustomOp):
+    """Class that corresponds to finn-rtllib 'thresholding' function."""
+
+    def __init__(self, onnx_node, **kwargs):
+        super().__init__(onnx_node, **kwargs)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            # parallelization; channels thresholded per cycle
+            "PE": ("i", True, 0),
+            # number of channels (each may have different thresholds)
+            "NumChannels": ("i", True, 0),
+            # number of steps in thresholding function. Used only in decoupled mode
+            "numSteps": ("i", True, 1),
+            # FINN DataTypes for inputs, outputs
+            "inputDataType": ("s", True, ""),
+            "weightDataType": ("s", True, ""),
+            "outputDataType": ("s", True, ""),
+            # number of input vectors, examples:
+            # [1] is a single vector (like a FC layer with batch=1)
+            # [4] is four vectors (like a FC layer with batch=4)
+            # [1, 4, 4] is four * four vectors (like a conv layer with batch=1)
+            "numInputVectors": ("ints", False, [1]),
+            # name of the top module in verilog template. Used by PyVerilator
+            # and IPI generation
+            "gen_top_module": ("s", False, ""),
+            # bias to be applied to outputs of the node
+            "activation_bias": ("i", False, 0),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def calc_tmem(self):
+        """Calculates and returns TMEM."""
+        num_channels = self.get_nodeattr("NumChannels")
+        pe = self.get_nodeattr("PE")
+        return num_channels // pe
+
+    def make_shape_compatible_op(self, model):
+        oshape = self.get_normal_output_shape()
+        return super().make_const_shape_op(oshape)
+
+    def infer_node_datatype(self, model):
+        """Used for FINN DataType inference: set the output tensors' datatypes
+        accordingly for this node"""
+        node = self.onnx_node
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "inputDataType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype().name),
+                str(idt.name),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("inputDataType", idt.name)
+        # set output datatype from property
+        odt = self.get_output_datatype()
+        model.set_tensor_datatype(node.output[0], odt)
+
+    def verify_node(self):
+        """Required by the FINN nalysis module. Checks if custom ops in graph
+        are correctly built, with all attributes and inputs."""
+        return []
+
+    def bram_estimation(self):
+        return 0
+
+    def lut_estimation(self):
+        return 0
+
+    def get_input_datatype(self, ind=0):
+        return DataType[self.get_nodeattr("inputDataType")]
+
+    def get_output_datatype(self, ind=0):
+        return DataType[self.get_nodeattr("outputDataType")]
+
+    def get_weight_datatype(self):
+        """The term 'weights' and 'thresholds' are used interchangably in this class."""
+        return DataType[self.get_nodeattr("weightDataType")]
+
+    def minimize_accumulator_width(self, model):
+        "Minimize threshold width ('accumulator width' here due to convention)"
+        thresholds = model.get_initializer(self.onnx_node.input[1])
+        threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
+        min_threshold = thresholds.min()
+        max_threshold = thresholds.max()
+        min_input = self.get_input_datatype().min()
+        max_input = self.get_input_datatype().max()
+        # get range required by threshold values
+        tdt_min = min(min_input, min_threshold)
+        tdt_max = max(max_input, max_threshold)
+        if tdt_min < 0:
+            if abs(tdt_min) > tdt_max:
+                tdt = DataType.get_smallest_possible(tdt_min)
+            else:
+                tdt = DataType.get_smallest_possible(-tdt_max - 1)
+        else:
+            tdt = DataType.get_smallest_possible(tdt_max)
+        assert np.vectorize(tdt.allowed)(
+            threshold_tensor
+        ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+        self.set_nodeattr("weightDataType", tdt.name)
+        return DataType[self.get_nodeattr("weightDataType")]
+
+    def get_instream_width(self, ind=0):
+        i_bits = self.get_input_datatype().bitwidth()
+        return i_bits * self.get_nodeattr("PE")
+
+    def get_outstream_width(self, ind=0):
+        o_bits = self.get_output_datatype().bitwidth()
+        return o_bits * self.get_nodeattr("PE")
+
+    def get_weightstream_width(self):
+        """Returns weight stream width"""
+        pe = self.get_nodeattr("PE")
+        wp = self.get_weight_datatype().bitwidth()
+        n_thres_steps = self.get_nodeattr("numSteps")
+        w_width = pe * wp * n_thres_steps
+        return w_width
+
+    def get_folded_input_shape(self, ind=0):
+        fold = self.calc_tmem()
+        pe = self.get_nodeattr("PE")
+        vecs = list(self.get_nodeattr("numInputVectors"))
+        folded_input_shape = tuple(vecs + [fold, pe])
+        return folded_input_shape
+
+    def get_folded_output_shape(self, ind=0):
+        # same shape as input
+        return self.get_folded_input_shape()
+
+    def get_normal_input_shape(self, ind=0):
+        num_channels = self.get_nodeattr("NumChannels")
+        vecs = list(self.get_nodeattr("numInputVectors"))
+        normal_input_shape = tuple(vecs + [num_channels])
+        return normal_input_shape
+
+    def get_normal_output_shape(self, ind=0):
+        # same shape as input
+        return self.get_normal_input_shape()
+
+    def get_number_output_values(self):
+        return 0
+
+    def get_exp_cycles(self):
+        return 0
+
+    def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
+        """Convert the original numpy weight matrix orig_weight_matrix into
+        a form suitable for passing to the hlslib call:
+        * ensure MH % PE == 0
+        * for unsigned inputs, ensure thresholds are positive
+        * interleave rows between PEs
+        * reshape into (PE, TMEM, n_thres_steps) and return
+        """
+        mh = self.get_nodeattr("NumChannels")
+        pe = self.get_nodeattr("PE")
+        tmem = mh // pe
+        assert mh % pe == 0, "Requirement NumChannels divisable by PE is violated."
+        assert (
+            orig_thres_matrix.ndim == 2
+        ), """Threshold matrix dimension is
+        not as expected (2)."""
+        n_thres_steps = orig_thres_matrix.shape[1]
+        assert n_thres_steps == self.get_nodeattr(
+            "numSteps"
+        ), "Mismatch in threshold steps"
+        if not self.get_input_datatype().signed():
+            # ensure all thresholds are nonnegative
+            assert (orig_thres_matrix >= 0).all()
+        # ensure all thresholds are integer
+        assert np.equal(
+            np.mod(orig_thres_matrix, 1), 0
+        ).all(), "Need int threshold tensor"
+        ret = orig_thres_matrix
+        # ensure channels = mh , duplicating if necessary
+        if ret.shape[0] == 1:
+            ret = np.tile(ret, (mh, 1))
+        assert (
+            ret.shape[0] == mh
+        ), "Channels of threshold matrix are not as expected (mh)"
+        # distribute rows between PEs
+        ret = interleave_matrix_outer_dim_from_partitions(ret, pe)
+        assert (
+            ret.shape[0] == pe
+        ), """First dimension after distribution of the
+        rows between PEs is not as expected (pe)"""
+        assert (
+            ret.shape[1] == tmem
+        ), """Second dimension after distribution of the
+        rows between PEs is not as expected (tmem)"""
+        assert (
+            ret.shape[2] == n_thres_steps
+        ), """Third dimension after distribution of the
+        rows between PEs is not as expected (n_thres_steps)"""
+        return ret.reshape(1, pe, tmem, n_thres_steps)
+
+    def prepare_codegen_rtl_values(self):
+        """All dictionary values produced in this function are to replace
+        their key value(s) in the RTL template files"""
+        code_gen_dict = {}
+
+        # Identify the module name
+        code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [
+            self.get_verilog_top_module_name() + "_axi_wrapper"
+        ]
+        # Set the top module name - AXI wrapper
+        code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"]
+
+        # Identify the module variables
+        output_data_type = self.get_nodeattr("outputDataType")  # output precision
+        input_data_type = self.get_nodeattr(
+            "inputDataType"
+        )  # input/threshold precision
+        num_channels = self.get_nodeattr("NumChannels")  # number of channels
+        bias = self.get_nodeattr("activation_bias")  # activation bias value
+        pe = self.get_nodeattr("PE")
+
+        code_gen_dict["$N$"] = [
+            str(DataType[output_data_type].bitwidth())
+        ]  # output precision - convert bitwidth to string
+        code_gen_dict["$M$"] = [
+            str(DataType[input_data_type].bitwidth())
+        ]  # input/threshold precision - convert bitwidth to string
+        code_gen_dict["$C$"] = [str(num_channels)]  # number of channels
+        code_gen_dict["$BIAS$"] = [str(bias)]  # activation bias value
+        code_gen_dict["$PE$"] = [str(pe)]  # requires C = M*PE
+
+        # Is the input datatype signed or unsigned?
+        # The thresholding core needs to know this when comparing weights to inputs
+        if self.get_input_datatype().signed():
+            code_gen_dict["$SIGNED$"] = [str(1)]
+        else:
+            code_gen_dict["$SIGNED$"] = [str(0)]
+
+        return code_gen_dict
+
+    def get_rtl_file_list(self):
+        """Thresholding binary search RTL file list"""
+        return ["thresholding.sv", "thresholding_axi.sv", "thresholding_axi_wrapper.v"]
+
+    def get_rtl_file_paths(self):
+        """Get full path of all RTL files"""
+        rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/"
+        rtl_file_list = self.get_rtl_file_list()
+        rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list]
+        return rtl_file_paths
+
+    def get_rtl_template_data(self, path):
+        """Return RTL file contents as a template"""
+        with open(path, "r") as f:
+            template = f.read()
+        return template
+
+    def fill_in_rtl_template_data(self, replace_dict, template_data):
+        """Use attribute values to finn in RTL template placeholders"""
+        template_data_cp = template_data
+        for key in replace_dict:
+            replacement_line = "\n".join(replace_dict[key])
+            template_data_cp = template_data_cp.replace(key, replacement_line)
+        return template_data_cp
+
+    def dump_rtl_data(self, dest_dir, filename, data):
+        """Dump filled-in-template RTL files for future synthesis step"""
+        with open(os.path.join(dest_dir, filename), "w") as f:
+            f.write(data)
+        return
+
+    def generate_hdl(self):
+        """Prepare HDL files from templates for synthesis"""
+        # Generate a dictionary of values to put in RTL template
+        code_gen_dict = self.prepare_codegen_rtl_values()
+
+        # Retrieve the destination directory for the final RTL files
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        for rtl_file_path in self.get_rtl_file_paths():
+            # read in original RTL template file
+            template_data = self.get_rtl_template_data(rtl_file_path)
+            # apply code generation to templates
+            data = self.fill_in_rtl_template_data(code_gen_dict, template_data)
+            # dump filled-in template to destination directory for compilation
+            file_only_path = rtl_file_path.split("/")[-1]
+            self.dump_rtl_data(code_gen_dir, file_only_path, data)
+
+        # Before we return - set the 'gen_top_module' attribute for use later
+        # by PyVerilator and IPI generation
+        self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0])
+        return
+
+    def code_generation_ipgen(self, model, fpgapart, clk):
+        self.generate_hdl()
+
+        # set ipgen_path and ip_path so that HLS-Synth transformation
+        # and stich_ip transformation do not complain
+        # i.e. during the HLSSynthIP() transformation
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        self.set_nodeattr("ipgen_path", code_gen_dir)
+        self.set_nodeattr("ip_path", code_gen_dir)
+        return
+
+    def prepare_rtlsim(self):
+        """Creates a Verilator emulation library for the RTL code generated
+        for this node, sets the rtlsim_so attribute to its path and returns
+        a PyVerilator wrapper around it."""
+
+        if PyVerilator is None:
+            raise ImportError("Installation of PyVerilator is required.")
+
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        verilog_paths = [code_gen_dir]
+        verilog_files = self.get_rtl_file_list()
+
+        # build the Verilator emulation library
+        sim = PyVerilator.build(
+            verilog_files,
+            build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"),
+            verilog_path=verilog_paths,
+            trace_depth=get_rtlsim_trace_depth(),
+            top_module_name=self.get_nodeattr("gen_top_module"),
+        )
+
+        # save generated lib filename in attribute
+        self.set_nodeattr("rtlsim_so", sim.lib._name)
+        return sim
+
+    def execute_node(self, context, graph):
+        # Perform input checks
+        if self.get_nodeattr("exec_mode") != "rtlsim":
+            raise Exception(
+                "Invalid exec_mode value: {}; exec_mode must be set to '{}'".format(
+                    self.get_nodeattr("exec_mode"), "rtlsim"
+                )
+            )
+
+        node = self.onnx_node
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        # create a npy file fore each input of the node (in_ind is input index)
+        in_ind = 0
+        for inputs in node.input:
+            # it is assumed that the first input of the node is the data input
+            # the second input are the weights
+            # the third input are the thresholds
+            if in_ind == 0:
+                assert (
+                    str(context[inputs].dtype) == "float32"
+                ), """Input datatype is
+                not float32 as expected."""
+                expected_inp_shape = self.get_folded_input_shape()
+                reshaped_input = context[inputs].reshape(expected_inp_shape)
+
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
+                    # store bipolar activations as binary
+                    reshaped_input = (reshaped_input + 1) / 2
+                    export_idt = DataType["BINARY"]
+                else:
+                    export_idt = self.get_input_datatype()
+
+                # make copy before saving the array
+                reshaped_input = reshaped_input.copy()
+                np.save(
+                    os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
+                    reshaped_input,
+                )
+            elif in_ind > 2:
+                raise Exception("Unexpected input found for Thresholding_Binary_Search")
+            in_ind += 1
+
+        # Create a PyVerilator wrapper of the RTLSim .so
+        sim = self.get_rtlsim()
+        nbits = self.get_instream_width()
+        inp = npy_to_rtlsim_input(
+            "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+        )
+
+        super().reset_rtlsim(sim)
+        super().toggle_clk(sim)
+
+        wnbits = self.get_weightstream_width()
+        export_wdt = self.get_weight_datatype()
+        wei = npy_to_rtlsim_input(
+            "{}/thresholds.npy".format(code_gen_dir), export_wdt, wnbits
+        )
+        num_w_reps = np.prod(self.get_nodeattr("numInputVectors"))
+        io_dict = {
+            "inputs": {"in0": inp, "weights": wei * num_w_reps},
+            "outputs": {"s_axis": []},
+        }
+        self.rtlsim_multi_io(sim, io_dict)
+        output = io_dict["outputs"]["out"]
+
+        # Manage output data
+        odt = self.get_output_datatype()
+        target_bits = odt.bitwidth()
+        packed_bits = self.get_outstream_width()
+        out_npy_path = "{}/output.npy".format(code_gen_dir)
+        out_shape = self.get_folded_output_shape()
+
+        rtlsim_output_to_npy(
+            output, out_npy_path, odt, out_shape, packed_bits, target_bits
+        )
+
+        # load and reshape output
+        output = np.load(out_npy_path)
+        oshape = self.get_normal_output_shape()
+        output = np.asarray([output], dtype=np.float32).reshape(*oshape)
+        context[node.output[0]] = output
+        return
+
+    def code_generation_ipi(self):
+        """Constructs and returns the TCL commands for node instantiation as an RTL
+        block."""
+        cmd = []
+        rtl_file_list = self.get_rtl_file_list()
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+
+        for rtl_file in rtl_file_list:
+            cmd.append(
+                "add_files -norecurse %s" % (os.path.join(code_gen_dir, rtl_file))
+            )
+
+        # Create an RTL block, not an IP core (-type ip)
+        cmd.append(
+            "create_bd_cell -type module -reference %s %s"
+            % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)
+        )
+
+        return cmd
+
+    def get_verilog_top_module_intf_names(self):
+        """Return a dict of names of input and output interfaces.
+        The keys reflect the protocols each interface implements:
+        'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'.
+        Values are lists of tuples (axis, aximm) or names (axilite):
+        'axis' tuples correspond to the list of node inputs in order,
+        each tuple is (interface_name, interface_width_bits).
+        axilite always assumed to be 32 bits and is not tuple (name only).
+        Each block must have at most one aximm and one axilite."""
+
+        intf_names = super().get_verilog_top_module_intf_names()
+        intf_names["axilite"] = ["s_axilite"]
+        return intf_names
+
+    def get_dynamic_config(self, model, address_stride=1):
+        """Returns a configuration dictionary containing axilite write commands
+        in order to program the thresholds into the RTL core during runtime.
+        The default address stride for the weights is 1 byte."""
+
+        thresholds = model.get_initializer(self.onnx_node.input[1])
+        num_channels, num_weights_per_channel = thresholds.shape
+
+        weight_addr_boundary = find_next_power_of_2(num_weights_per_channel)
+        # Make sure that the next power of 2 (output) is greater than the input
+        assert weight_addr_boundary >= num_weights_per_channel
+
+        config = {}
+        channel_cntr = 0
+        for channel in thresholds:
+            channel_start_addr = channel_cntr * weight_addr_boundary * address_stride
+            weight_cntr = 0
+            addr = 0
+            for weight in channel:
+                key_name = "{}_{}{}_{}{}".format(
+                    "axilite", "ch", str(channel_cntr), "w", str(weight_cntr)
+                )
+                config[key_name] = (
+                    channel_start_addr + addr,
+                    int(
+                        str(
+                            pack_innermost_dim_as_hex_string(
+                                [weight],
+                                self.get_weight_datatype(),
+                                self.get_weight_datatype().bitwidth(),
+                            )
+                        ),
+                        0,
+                    ),
+                )
+
+                weight_cntr += 1
+                addr += address_stride
+
+            channel_cntr += 1
+
+        return config
+
+    def ipgen_singlenode_code(self):
+        """Normally: Builds the bash script for IP generation."""
+        """This is needed for the HLSSynthIP() transformation.
+        This is an IP, not a HLS node, so therefore provide an empty hook
+        to prevent any HLS synthesis."""
+        pass
+
+    def global_includes(self):
+        pass
+
+    def defines(self, var):
+        pass
+
+    def read_npy_data(self):
+        pass
+
+    def strm_decl(self):
+        pass
+
+    def docompute(self):
+        pass
+
+    def dataoutstrm(self):
+        pass
+
+    def save_as_npy(self):
+        pass
+
+    def blackboxfunction(self):
+        pass
+
+    def pragmas(self):
+        pass
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index ef02453498..a50cbbaed1 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1019,9 +1019,10 @@ def apply(self, model):
 class InferThresholdingLayer(Transformation):
     """Convert any MultiThreshold into a standalone thresholding HLS layer."""
 
-    def __init__(self, mem_mode="const"):
+    def __init__(self, mem_mode="const", use_rtl_variant=False):
         super().__init__()
         self.mem_mode = mem_mode
+        self.use_rtl_variant = use_rtl_variant
 
     def apply(self, model):
         graph = model.graph
@@ -1073,27 +1074,65 @@ def apply(self, model):
                 )
                 actval = int(actval)
                 assert (not odt.signed()) or (actval < 0), (
-                    node.name + ": Signed output requres actval < 0"
-                )
-                # create and insert new Thresholding_Batch node
-                new_node = helper.make_node(
-                    "Thresholding_Batch",
-                    [thl_input, thl_threshold],
-                    [thl_output],
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                    NumChannels=ifc,
-                    PE=pe,
-                    numSteps=thl_thres_shape[1],
-                    inputDataType=idt.name,
-                    # weightDataType can be tightened by MinimizeAccumulatorWidth
-                    weightDataType=idt.name,
-                    outputDataType=odt.name,
-                    numInputVectors=list(thl_in_shape[:-1]),
-                    ActVal=actval,
-                    mem_mode=self.mem_mode,
-                    name="Thresholding_Batch_" + node.name,
+                    node.name + ": Signed output requires actval < 0"
                 )
+
+                # Ensure that RTL variant is not inserted for unsupported configuration
+                is_rtl_variant_compatible = True
+
+                # Perform checks for RTL variant if chosen
+                if self.use_rtl_variant:
+                    assert self.mem_mode == "decoupled", (
+                        """%s : RTL Thresholding only supports 'decoupled' memory
+                        mode."""
+                        % node.name
+                    )
+
+                if self.use_rtl_variant and is_rtl_variant_compatible:
+                    new_node = helper.make_node(
+                        "Thresholding_Binary_Search",
+                        [thl_input, thl_threshold],
+                        [thl_output],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        NumChannels=ifc,
+                        PE=pe,
+                        numSteps=thl_thres_shape[1],
+                        inputDataType=idt.name,
+                        weightDataType=idt.name,
+                        outputDataType=odt.name,
+                        numInputVectors=list(thl_in_shape[:-1]),
+                        activation_bias=actval,
+                        mem_mode=self.mem_mode,
+                        name="Thresholding_Binary_Search_" + node.name,
+                    )
+                else:
+                    if self.use_rtl_variant:
+                        warnings.warn(
+                            """%s : RTL Thresholding requested for unsupported
+                            configuration. Falling back to HLS implementation."""
+                            % node.name
+                        )
+
+                    # create and insert new Thresholding_Batch node
+                    new_node = helper.make_node(
+                        "Thresholding_Batch",
+                        [thl_input, thl_threshold],
+                        [thl_output],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        NumChannels=ifc,
+                        PE=pe,
+                        numSteps=thl_thres_shape[1],
+                        inputDataType=idt.name,
+                        weightDataType=idt.name,
+                        outputDataType=odt.name,
+                        numInputVectors=list(thl_in_shape[:-1]),
+                        ActVal=actval,
+                        mem_mode=self.mem_mode,
+                        name="Thresholding_Batch_" + node.name,
+                    )
+
                 graph.node.insert(insert_point, new_node)
                 # remove old node
                 graph.node.remove(node)
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 1796738c58..5252422dcf 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -228,3 +228,22 @@ def is_exe(fpath):
                 return exe_file
 
     return None
+
+
+def find_next_power_of_2(n):
+    """For any integer 'n', find the next greatest power of 2"""
+    # Negative values will loop infinitely below - return 0
+    if n <= 0:
+        return 0
+    # If '1' is requested, output will be '0' in the loop below, avoid this now.
+    elif n == 1:
+        return 2  # i.e. 2**1
+
+    # decrement 'n' (to handle cases when `n` itself is a power of 2)
+    n = n - 1
+
+    # loop until only one bit is left
+    while n & n - 1:
+        # unset rightmost bit
+        n = n & n - 1
+    return n << 1
diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
new file mode 100755
index 0000000000..9c233bdd06
--- /dev/null
+++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py
@@ -0,0 +1,276 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
+from test_fpgadataflow_thresholding_binary_search import (
+    make_single_thresholding_binary_search_modelwrapper,
+)
+
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+
+test_fpga_part = "xczu3eg-sbva484-1-e"
+target_clk_ns = 5
+
+
+# Helper functions
+def sort_thresholds_increasing(thresholds):
+    return np.sort(thresholds, axis=1)
+
+
+def generate_random_threshold_values(input_data_type, num_input_channels, num_steps):
+    return np.random.randint(
+        input_data_type.min(),
+        input_data_type.max() + 1,
+        (num_input_channels, num_steps),
+    ).astype(np.float32)
+
+
+def generate_pe_value(fold, num_input_channels):
+    if fold == -1:
+        fold = num_input_channels
+    pe = num_input_channels // fold
+    assert num_input_channels % pe == 0
+    return pe
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NCHW to NHWC
+def convert_np_array_to_finn_data_layout(data):
+    return np.transpose(data, (0, 2, 3, 1))
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NHWC to NCHW
+def convert_np_array_to_standard_data_layout(data):
+    return np.transpose(data, (0, 3, 1, 2))
+
+
+def make_single_multithresholding_modelwrapper(
+    thresholds,
+    pe,
+    input_data_type,
+    output_data_type,
+    activation_bias,
+    num_input_vecs,
+):
+    NumChannels = thresholds.shape[0]
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+
+    node_inp_list = ["inp", "thresh"]
+
+    Multithresholding_node = helper.make_node(
+        "MultiThreshold",
+        node_inp_list,
+        ["outp"],
+        domain="qonnx.custom_op.general",
+        out_dtype=output_data_type.name,
+        out_bias=float(activation_bias),
+        out_scale=1.0,
+    )
+
+    graph = helper.make_graph(
+        nodes=[Multithresholding_node],
+        name="multithresholding_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="multithresholding-model")
+    model = ModelWrapper(model)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    model = model.transform(GiveUniqueNodeNames())
+
+    model.set_tensor_datatype("inp", input_data_type)
+    model.set_tensor_datatype("outp", output_data_type)
+
+    model.set_tensor_datatype("thresh", input_data_type)
+    model.set_initializer("thresh", thresholds)
+    return model
+
+
+# N.B. Fold values where C % PE != 0 fail
+@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
+@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
+@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6])
+@pytest.mark.parametrize("num_input_channels", [16])
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
+def test_convert_to_hls_tbs_rtl_variant(
+    activation,
+    input_data_type,
+    fold,
+    num_input_channels,
+):
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = activation.get_num_possible_values() - 1
+
+    # See convert_to_hls_layers::InferThresholdingLayer:
+    # assert (not odt.signed()) or (actval < 0)
+    # This implies that it expects a negative activation, BIPOLAR does not provide that
+    if activation == DataType["BIPOLAR"]:
+        pytest.skip(
+            "Only negative activations are supported for "
+            "RTL Thresholding Binary Search node"
+        )
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # generate random input data
+    tensor_shape = tuple(num_input_vecs + [num_input_channels])
+    x = gen_finn_dt_tensor(input_data_type, tensor_shape)
+
+    # Generate random thresholds and sort in ascending order
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
+
+    # provide non-decreasing/ascending thresholds
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    x_nhwc = convert_np_array_to_standard_data_layout(x)
+    y = multithreshold(x_nhwc, thresholds)
+
+    # convert back to NHWC for comparison to hw outputs
+    y = convert_np_array_to_finn_data_layout(y)
+    if activation == DataType["BIPOLAR"]:
+        # binary to bipolar
+        y = 2 * y - 1
+    else:
+        # signed offset
+        y += activation.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    model = model.transform(InsertFIFO(True))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    # Retrieve the axilite programming sequence for weights - for decoupled mode only
+    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+    tbs_inst = getCustomOp(tbs_node)
+    config = tbs_inst.get_dynamic_config(model, 4)
+
+    # Reshape generated data (not from model)
+    oshape = model.get_tensor_shape("outp")
+    y_expected = y.reshape(oshape)
+
+    # Helper function that delivers the hook to program the thresholds via AXI-Lite
+    def config_hook(config):
+        if config is None:
+            return None
+
+        def write_thresh_config(sim):
+            # axi_name = "s_axilite_0_" # works
+            axi_name = getCustomOp(
+                model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+            ).get_verilog_top_module_intf_names()["axilite"][0]
+            axi_name += "_0_"
+
+            # Write config registers to the Threshold memory.
+            # The dictionary defines (addr, value) tuples.
+            for config_entry in config.values():
+                addr = config_entry[0]
+                val = config_entry[1]
+                axilite_write(sim, addr, val, basename=axi_name)
+
+            reset_rtlsim(sim)
+
+        return write_thresh_config
+
+    input_dict = {"inp": x}
+    rtlsim_exec(model, input_dict, pre_hook=config_hook(config))
+    y_produced = input_dict["outp"]
+    assert (y_produced == y_expected).all()
+
+    # Make a Multithreshold graph and convert to thresholding binary search node
+    new_model = make_single_multithresholding_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    # Recreate the model using the ConvertToHLS transform
+    new_model = new_model.transform(
+        to_hls.InferThresholdingLayer(mem_mode="decoupled", use_rtl_variant=True)
+    )
+    new_model = new_model.transform(InsertFIFO(True))
+    new_model = new_model.transform(GiveUniqueNodeNames())
+    new_model = new_model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    new_model = new_model.transform(HLSSynthIP())
+    new_model = new_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    input_dict = {"inp": x}
+    rtlsim_exec(new_model, input_dict, pre_hook=config_hook(config))
+    y_produced_new = input_dict["outp"]
+    assert (y_produced_new == y_expected).all()
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
new file mode 100755
index 0000000000..24b60f5ea5
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py
@@ -0,0 +1,287 @@
+# Copyright (C) 2022, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+test_fpga_part = "xczu3eg-sbva484-1-e"
+target_clk_ns = 5
+
+
+# Helper functions
+def sort_thresholds_increasing(thresholds):
+    return np.sort(thresholds, axis=1)
+
+
+def generate_random_threshold_values(input_data_type, num_input_channels, num_steps):
+    return np.random.randint(
+        input_data_type.min(),
+        input_data_type.max() + 1,
+        (num_input_channels, num_steps),
+    ).astype(np.float32)
+
+
+def generate_pe_value(fold, num_input_channels):
+    if fold == -1:
+        fold = num_input_channels
+    pe = num_input_channels // fold
+    assert num_input_channels % pe == 0
+    return pe
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NCHW to NHWC
+def convert_np_array_to_finn_data_layout(data):
+    return np.transpose(data, (0, 2, 3, 1))
+
+
+# n = batch, c = channel, h = height, w = width of feature map
+# Standard = NCHW; FINN = NHWC
+# Convert from NHWC to NCHW
+def convert_np_array_to_standard_data_layout(data):
+    return np.transpose(data, (0, 3, 1, 2))
+
+
+def make_single_thresholding_binary_search_modelwrapper(
+    thresholds,
+    pe,
+    input_data_type,
+    output_data_type,
+    activation_bias,
+    num_input_vecs,
+):
+
+    NumChannels = thresholds.shape[0]
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels]
+    )
+
+    node_inp_list = ["inp", "thresh"]
+
+    Thresholding_node = helper.make_node(
+        "Thresholding_Binary_Search",
+        node_inp_list,
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        NumChannels=NumChannels,
+        PE=pe,
+        numSteps=thresholds.shape[1],
+        inputDataType=input_data_type.name,
+        weightDataType=input_data_type.name,
+        outputDataType=output_data_type.name,
+        activation_bias=activation_bias,
+        numInputVectors=num_input_vecs,
+    )
+    graph = helper.make_graph(
+        nodes=[Thresholding_node],
+        name="thresholding_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="thresholding-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", input_data_type)
+    model.set_tensor_datatype("outp", output_data_type)
+
+    model.set_tensor_datatype("thresh", input_data_type)
+    model.set_initializer("thresh", thresholds)
+    return model
+
+
+# Test brief: Test that PrepareRTLSim() runs successfully. This function is not
+# tested in test_fpgadataflow_thresholding_binary_search()
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
+def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim():
+    input_data_type = DataType["INT16"]
+    act = DataType["INT4"]
+    fold = -1
+    num_input_channels = 16
+
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = act.get_num_possible_values() - 1
+
+    # Generate random, non-decreasing thresholds
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = act
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    model = model.transform(SetExecMode("rtlsim"))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(PrepareRTLSim())
+    return
+
+
+# Test brief: Create a Thresholding binary search layer using various parameters
+# and test against a SW generated & simulated dataset
+# N.B. Fold values where C % PE != 0 fail
+@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
+@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]])
+@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6])
+@pytest.mark.parametrize("num_input_channels", [16])
+@pytest.mark.fpgadataflow
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_fpgadataflow_thresholding_binary_search(
+    activation, input_data_type, fold, num_input_channels
+):
+    # Handle inputs to the test
+    pe = generate_pe_value(fold, num_input_channels)
+    num_steps = activation.get_num_possible_values() - 1
+
+    # Other non-input parameters
+    num_input_vecs = [1, 2, 2]
+    output_data_type = activation
+    if output_data_type == DataType["BIPOLAR"]:
+        activation_bias = 0
+    else:
+        activation_bias = output_data_type.min()
+
+    # generate random input data
+    tensor_shape = tuple(num_input_vecs + [num_input_channels])
+    x = gen_finn_dt_tensor(input_data_type, tensor_shape)
+
+    # Generate random thresholds and sort in ascending order
+    thresholds = generate_random_threshold_values(
+        input_data_type, num_input_channels, num_steps
+    )
+
+    # provide non-decreasing/ascending thresholds
+    thresholds = sort_thresholds_increasing(thresholds)
+
+    x_nhwc = convert_np_array_to_standard_data_layout(x)
+    y = multithreshold(x_nhwc, thresholds)
+
+    # convert back to NHWC for comparison to hw outputs
+    y = convert_np_array_to_finn_data_layout(y)
+    if activation == DataType["BIPOLAR"]:
+        # binary to bipolar
+        y = 2 * y - 1
+    else:
+        # signed offset
+        y += activation.min()
+
+    # Generate model from input parameters to the test
+    model = make_single_thresholding_binary_search_modelwrapper(
+        thresholds,
+        pe,
+        input_data_type,
+        output_data_type,
+        activation_bias,
+        num_input_vecs,
+    )
+
+    model = model.transform(InsertFIFO(True))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+
+    # Retrieve the axilite programming sequence for weights - for decoupled mode only
+    tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+    tbs_inst = getCustomOp(tbs_node)
+    config = tbs_inst.get_dynamic_config(model, 4)
+
+    # Reshape generated data (not from model)
+    oshape = model.get_tensor_shape("outp")
+    y_expected = y.reshape(oshape)
+
+    # Helper function that delivers the hook to program the thresholds via AXI-Lite
+    def config_hook(config):
+        if config is None:
+            return None
+
+        def write_thresh_config(sim):
+            # axi_name = "s_axilite_0_" # works
+            axi_name = getCustomOp(
+                model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]
+            ).get_verilog_top_module_intf_names()["axilite"][0]
+            axi_name += "_0_"
+
+            # Write config registers to the Threshold memory.
+            # The dictionary defines (addr, value) tuples.
+            for config_entry in config.values():
+                addr = config_entry[0]
+                val = config_entry[1]
+                axilite_write(sim, addr, val, basename=axi_name)
+
+            reset_rtlsim(sim)
+
+        return write_thresh_config
+
+    input_dict = {"inp": x}
+    rtlsim_exec(model, input_dict, pre_hook=config_hook(config))
+    y_produced = input_dict["outp"]
+    assert (y_produced == y_expected).all()
diff --git a/tests/util/test_basic.py b/tests/util/test_basic.py
new file mode 100755
index 0000000000..97a8c50261
--- /dev/null
+++ b/tests/util/test_basic.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import finn.util.basic as basic
+
+
+@pytest.mark.util
+def test_next_power_of_2():
+    test_vector = [
+        {"input": -2, "expected_result": 0},
+        {"input": -1, "expected_result": 0},
+        {"input": 0, "expected_result": 0},
+        {"input": 1, "expected_result": 2},
+        {"input": 2, "expected_result": 2},
+        {"input": 3, "expected_result": 4},
+        {"input": 4, "expected_result": 4},
+        {"input": 7, "expected_result": 8},
+        {"input": 8, "expected_result": 8},
+        {"input": 11, "expected_result": 16},
+        {"input": 15, "expected_result": 16},
+        {"input": 16, "expected_result": 16},
+        {"input": 18, "expected_result": 32},
+        {"input": 27, "expected_result": 32},
+        {"input": 31, "expected_result": 32},
+        {"input": 32, "expected_result": 32},
+        {"input": 42, "expected_result": 64},
+        {"input": 65, "expected_result": 128},
+    ]
+
+    for test_dict in test_vector:
+        output = basic.find_next_power_of_2(test_dict["input"])
+        assert output >= test_dict["input"]
+        assert output == test_dict["expected_result"]