From 3002e6239903af9f9f9444ef3fbbb8935ba2bb92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 20 Sep 2022 11:08:59 +0100 Subject: [PATCH 001/111] HDL for new thresholding by binary search. --- finn-rtllib/thresholding/hdl/thresholding.sv | 153 ++++++++++++++ .../thresholding/hdl/thresholding_axi.sv | 198 ++++++++++++++++++ .../hdl/thresholding_axi_wrapper.v | 122 +++++++++++ 3 files changed, 473 insertions(+) create mode 100644 finn-rtllib/thresholding/hdl/thresholding.sv create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi.sv create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv new file mode 100644 index 0000000000..93ccdc51c5 --- /dev/null +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -0,0 +1,153 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Pipelined thresholding by binary search. + * @author Thomas B. Preußer + * + * @description + * Produces the N-bit count of those among 2^N-1 thresholds that are not + * larger than the corresponding input: + * y = Σ(T_i <= x) + * The result is computed by binary search. The runtime-configurable + * thresholds must be written in ascending order: + * i < j => T_i < T_j + * The design supports channel folding allowing each input to be processed + * with respect to a selectable set of thresholds. The corresponding + * threshold configuration relies on a channel address prefix. Inputs are + * accompanied by a channel selector. + *****************************************************************************/ +module thresholding #( + int unsigned N, // output precision + int unsigned M, // input/threshold precision + int unsigned C, // number of channels + + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C) +)( + // Global Control + input logic clk, + input logic rst, + + // Threshold Configuration + input logic twe, + input logic [$clog2(C)+N-1:0] twa, + input logic [ M-1:0] twd, + + // Clock Enable for Stream Processing + input logic en, + + // Input Stream + input logic ivld, + input logic [C_BITS-1:0] icnl, // Ignored for C == 1 + input logic [M -1:0] idat, + + // Output Stream + output logic ovld, + output logic [C_BITS-1:0] ocnl, + output logic [N -1:0] odat +); + + // Pipeline Links & Feed + typedef struct packed { + logic vld; // Valid data identification + logic [C_BITS-1:0] cnl; // Channel + logic [M -1:0] val; // Original input value + logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage + } pipe_t; + uwire pipe_t pipe[0:N]; + assign pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} }; // Feed original input + + // Stages: 0, 1, ..., N-1 + uwire [0:N-1] tws = (twa[N-1:0]+1) & ~twa[N-1:0]; // Write Select per stage by address suffix + for(genvar stage = 0; stage < N; stage++) begin : genStages + + // Threshold Memory + uwire [M-1:0] thresh; + if(1) begin : blkUpdate + + // Write control: local select from global address + uwire we = twe && tws[stage]; + if((C == 1) && (stage == 0)) begin + logic [M-1:0] Thresh = 'x; + always_ff @(posedge clk) begin + if(rst) Thresh <= 'x; + else if(we) Thresh <= twd; + end + assign thresh = Thresh; + end + else begin + logic [M-1:0] Threshs[C * 2**stage]; + uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; + uwire [$clog2(C)+stage-1:0] ra; + if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; + if(stage) assign ra[stage-1:0] = pipe[stage].res[0:stage-1]; + + // Write + always_ff @(posedge clk) begin + if(we) Threshs[wa] <= twd; + end + + // Read + logic [M-1:0] RdReg; + always_ff @(posedge clk) begin + if(en) RdReg <= Threshs[ra]; + end + assign thresh = RdReg; + end + + end : blkUpdate + + // Pipeline regs simply copying the input + pipe_t State = '{ vld: 0, cnl: 'x, val: 'x, res: 'x }; + always_ff @(posedge clk) begin + if(rst) State <= '{ vld: 0, cnl: 'x, val: 'x, res: 'x }; + else if(en) State <= pipe[stage]; + end + + // Assemble pipeline data + logic [0:N-1] res; + always_comb begin + res = State.res; + res[stage] = thresh <= State.val; // Patch in next result bit + end + assign pipe[stage+1] = '{ + vld: State.vld, + cnl: State.cnl, + val: State.val, + res: res + }; + + end : genStages + + // Output + assign ovld = pipe[N].vld; + assign ocnl = pipe[N].cnl; + assign odat = pipe[N].res; + +endmodule : thresholding diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv new file mode 100644 index 0000000000..71e54c5ca0 --- /dev/null +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -0,0 +1,198 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief All-AXI interface adapter for thresholding module. + * @author Thomas B. Preußer + *****************************************************************************/ + +module thresholding_axi #( + int unsigned N, // output precision + int unsigned M, // input/threshold precision + int unsigned C // Channels +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input logic s_axilite_AWVALID, + output logic s_axilite_AWREADY, + input logic [$clog2(C)+N-1:0] s_axilite_AWADDR, + + input logic s_axilite_WVALID, + output logic s_axilite_WREADY, + input logic [31:0] s_axilite_WDATA, + input logic [ 3:0] s_axilite_WSTRB, + + output logic s_axilite_BVALID, + input logic s_axilite_BREADY, + output logic [1:0] s_axilite_BRESP, + + // Reading + input logic s_axilite_ARVALID, + output logic s_axilite_ARREADY, + input logic [0:0] s_axilite_ARADDR, + + output logic s_axilite_RVALID, + input logic s_axilite_RREADY, + output logic [31:0] s_axilite_RDATA, + output logic [ 1:0] s_axilite_RRESP, + + //- AXI Stream - Input -------------- + output logic s_axis_tready, + input logic s_axis_tvalid, + input logic [((M+7)/8)*8-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input logic m_axis_tready, + output logic m_axis_tvalid, + output logic [((N+7)/8)*8-1:0] m_axis_tdata +); + //- Global Control ------------------------------------------------------ + uwire clk = ap_clk; + uwire rst = !ap_rst_n; + + //- AXI Lite: Threshold Configuration ----------------------------------- + uwire twe; + uwire [$clog2(C)+N-1:0] twa; + uwire [ M-1:0] twd; + if(1) begin : blkAxiLite + logic WABusy = 0; + logic WDBusy = 0; + logic [$clog2(C)+N-1:0] Addr = 'x; + logic [ M-1:0] Data = 'x; + + assign twe = WABusy && WDBusy; + assign twa = Addr; + assign twd = Data; + + uwire clr_wr = rst || (twe && s_axilite_BREADY); + always_ff @(posedge clk) begin : blockName + if(clr_wr) begin + WABusy <= 0; + Addr <= 'x; + WDBusy <= 0; + Data <= 'x; + end + else begin + if(!WABusy) begin + WABusy <= s_axilite_AWVALID; + Addr <= s_axilite_AWADDR[$clog2(C)+N-1:0]; + end + if(!WDBusy) begin + WDBusy <= s_axilite_WVALID; + Data <= s_axilite_WDATA[M-1:0]; + end + end + end + assign s_axilite_AWREADY = !WABusy; + assign s_axilite_WREADY = !WDBusy; + assign s_axilite_BVALID = WABusy && WDBusy; + assign s_axilite_BRESP = '0; // OK + + // Answer all reads with '1 + logic RValid = 0; + uwire clr_rd = rst || (RValid && s_axilite_RREADY); + always_ff @(posedge clk) begin + if(clr_rd) RValid <= 0; + else if(!RValid) RValid <= s_axilite_ARVALID; + end + assign s_axilite_ARREADY = !RValid; + assign s_axilite_RVALID = RValid; + assign s_axilite_RDATA = '1; + assign s_axilite_RRESP = '0; // OK + + end : blkAxiLite + + //- IO-Sandwich with two-stage output buffer for containing a local enable + uwire en; + uwire [N-1:0] odat; + uwire ovld; + if(1) begin : blkOutputDecouple + typedef struct { + logic vld; + logic [N-1:0] dat; + } buf_t; + buf_t Buf[2] = '{ default: '{ vld: 0, dat: 'x } }; + always_ff @(posedge clk) begin + if(rst) Buf <= '{ default: '{ vld: 0, dat: 'x } }; + else begin + if(!Buf[1].vld || m_axis_tready) begin + Buf[1] <= '{ + vld: Buf[0].vld || ovld, + dat: Buf[0].vld? Buf[0].dat : odat + }; + end + Buf[0].vld <= Buf[1].vld && !m_axis_tready && (Buf[0].vld || ovld); + if(!Buf[0].vld) Buf[0].dat <= odat; + end + end + assign en = !Buf[0].vld; + + assign m_axis_tvalid = Buf[1].vld; + assign m_axis_tdata = Buf[1].dat; + + end : blkOutputDecouple + + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C); + uwire ivld = s_axis_tvalid; + uwire [C_BITS-1:0] icnl; + uwire [M -1:0] idat = s_axis_tdata[M-1:0]; + assign s_axis_tready = en; + if(C == 1) assign icnl = 'x; + else begin + logic [C_BITS-1:0] Chnl = 0; + logic Last = 0; + uwire inc = ivld && en; + uwire clr = rst || (Last && inc); + always_ff @(posedge clk) begin + if(clr) begin + Chnl <= 0; + Last <= 0; + end + else if(inc) begin + Chnl <= Chnl + 1; + Last <= (~Chnl & (C-2)) == 0; + end + end + assign icnl = Chnl; + end + + // Core Thresholding Module + thresholding #(.N(N), .M(M), .C(C)) core ( + .clk, .rst, + .twe, .twa, .twd, + .en, + .ivld, .icnl, .idat, + .ovld, .ocnl(), .odat + ); + +endmodule : thresholding_axi diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v new file mode 100644 index 0000000000..bb6b17b32f --- /dev/null +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -0,0 +1,122 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief IPI-compatible Verilog wrapper for thresholding_axi module. + * @author Thomas B. Preußer + *****************************************************************************/ + +module thresholding_axi_wrapper #( + parameter N, // output precision + parameter M, // input/threshold precision + parameter C, // Channels + parameter C_BITS //= $clog2(C) +)( + //- Global Control ------------------ + input ap_clk, + input ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [C_BITS+N-1:0] s_axilite_AWADDR, + + input s_axilite_WVALID, + output s_axilite_WREADY, + input [31:0] s_axilite_WDATA, + input [ 3:0] s_axilite_WSTRB, + + output s_axilite_BVALID, + input s_axilite_BREADY, + output [1:0] s_axilite_BRESP, + + // Reading + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [0:0] s_axilite_ARADDR, + + output s_axilite_RVALID, + input s_axilite_RREADY, + output [31:0] s_axilite_RDATA, + output [ 1:0] s_axilite_RRESP, + + //- AXI Stream - Input -------------- + output s_axis_tready, + input s_axis_tvalid, + input [((M+7)/8)*8-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input m_axis_tready, + output m_axis_tvalid, + output [((N+7)/8)*8-1:0] m_axis_tdata +); + + thresholding_axi #(.N(N), .M(M), .C(C)) inst ( + //- Global Control ------------------ + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n), + + //- AXI Lite ------------------------ + // Writing + .s_axilite_AWVALID(s_axilite_AWVALID), + .s_axilite_AWREADY(s_axilite_AWREADY), + .s_axilite_AWADDR(s_axilite_AWADDR), + + .s_axilite_WVALID(s_axilite_WVALID), + .s_axilite_WREADY(s_axilite_WREADY), + .s_axilite_WDATA(s_axilite_WDATA), + .s_axilite_WSTRB(s_axilite_WSTRB), + + .s_axilite_BVALID(s_axilite_BVALID), + .s_axilite_BREADY(s_axilite_BREADY), + .s_axilite_BRESP(s_axilite_BRESP), + + // Reading + .s_axilite_ARVALID(s_axilite_ARVALID), + .s_axilite_ARREADY(s_axilite_ARREADY), + .s_axilite_ARADDR(s_axilite_ARADDR), + + .s_axilite_RVALID(s_axilite_RVALID), + .s_axilite_RREADY(s_axilite_RREADY), + .s_axilite_RDATA(s_axilite_RDATA), + .s_axilite_RRESP(s_axilite_RRESP), + + //- AXI Stream - Input -------------- + .s_axis_tready(s_axis_tready), + .s_axis_tvalid(s_axis_tvalid), + .s_axis_tdata(s_axis_tdata), + + //- AXI Stream - Output ------------- + .m_axis_tready(m_axis_tready), + .m_axis_tvalid(m_axis_tvalid), + .m_axis_tdata(m_axis_tdata) + ); + +endmodule : thresholding_axi_wrapper From 3c92c2fc460fb5e45fdb0dfcc0b92c572ae65ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 20 Sep 2022 13:33:01 +0100 Subject: [PATCH 002/111] IP core support files for thresholding module. --- finn-rtllib/thresholding/component.xml | 817 ++++++++++++++++++ .../xgui/thresholding_axi_wrapper_v1_0.tcl | 74 ++ 2 files changed, 891 insertions(+) create mode 100644 finn-rtllib/thresholding/component.xml create mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml new file mode 100644 index 0000000000..0a56f93316 --- /dev/null +++ b/finn-rtllib/thresholding/component.xml @@ -0,0 +1,817 @@ + + + amd.com + user + thresholding_axi_wrapper + 1.0 + + + m_axis + + + + + + + TDATA + + + m_axis_tdata + + + + + TVALID + + + m_axis_tvalid + + + + + TREADY + + + m_axis_tready + + + + + + s_axis + + + + + + + TDATA + + + s_axis_tdata + + + + + TVALID + + + s_axis_tvalid + + + + + TREADY + + + s_axis_tready + + + + + + s_axilite + + + + + + + + + AWADDR + + + s_axilite_AWADDR + + + + + AWVALID + + + s_axilite_AWVALID + + + + + AWREADY + + + s_axilite_AWREADY + + + + + WDATA + + + s_axilite_WDATA + + + + + WSTRB + + + s_axilite_WSTRB + + + + + WVALID + + + s_axilite_WVALID + + + + + WREADY + + + s_axilite_WREADY + + + + + BRESP + + + s_axilite_BRESP + + + + + BVALID + + + s_axilite_BVALID + + + + + BREADY + + + s_axilite_BREADY + + + + + ARADDR + + + s_axilite_ARADDR + + + + + ARVALID + + + s_axilite_ARVALID + + + + + ARREADY + + + s_axilite_ARREADY + + + + + RDATA + + + s_axilite_RDATA + + + + + RRESP + + + s_axilite_RRESP + + + + + RVALID + + + s_axilite_RVALID + + + + + RREADY + + + s_axilite_RREADY + + + + + + ap_rst_n + + + + + + + RST + + + ap_rst_n + + + + + + POLARITY + ACTIVE_LOW + + + + + ap_clk + + + + + + + CLK + + + ap_clk + + + + + + ASSOCIATED_RESET + ap_rst_n + + + ASSOCIATED_BUSIF + m_axis:s_axis:s_axilite + + + + + + + s_axilite + s_axilite + + reg0 + reg0 + 0x0 + 4096 + 32 + register + + + + + + + xilinx_anylanguagesynthesis + Synthesis + :vivado.xilinx.com:synthesis + Verilog + thresholding_axi_wrapper + + xilinx_anylanguagesynthesis_view_fileset + + + + viewChecksum + 5cc8f7a9 + + + + + xilinx_xpgui + UI Layout + :vivado.xilinx.com:xgui.ui + + xilinx_xpgui_view_fileset + + + + viewChecksum + c456596c + + + + + + + ap_clk + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + + + ap_rst_n + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_AWVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_AWREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_AWADDR + + in + + 3 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_WVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_WREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_WDATA + + in + + 31 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_WSTRB + + in + + 3 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + 1 + + + + + s_axilite_BVALID + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_BREADY + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_BRESP + + out + + 1 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + + + s_axilite_ARVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_ARREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_ARADDR + + in + + 0 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_RVALID + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axilite_RREADY + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 0 + + + + + s_axilite_RDATA + + out + + 31 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + + + s_axilite_RRESP + + out + + 1 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + + + s_axis_tready + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axis_tvalid + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + + + s_axis_tdata + + in + + 15 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + 0 + + + + + m_axis_tready + + in + + + std_logic + xilinx_anylanguagesynthesis + + + + 1 + + + + + m_axis_tvalid + + out + + + std_logic + xilinx_anylanguagesynthesis + + + + + + m_axis_tdata + + out + + 7 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + + + + + + + + N + N + 4 + + + M + M + 16 + + + C + C + 1 + + + C_BITS + C Bits + 0 + + + + + + choice_list_74b5137e + ACTIVE_HIGH + ACTIVE_LOW + + + + + xilinx_anylanguagesynthesis_view_fileset + + hdl/thresholding.sv + systemVerilogSource + + + hdl/thresholding_axi.sv + systemVerilogSource + + + hdl/thresholding_axi_wrapper.v + verilogSource + CHECKSUM_2ec027ae + + + + xilinx_xpgui_view_fileset + + xgui/thresholding_axi_wrapper_v1_0.tcl + tclSource + CHECKSUM_c456596c + XGUI_VERSION_2 + + + + thresholding_axi_wrapper_v1_0 + + + N + N + 4 + + + M + M + 16 + + + C + C + 1 + + + C_BITS + C_BITS + 0 + + + Component_Name + thresholding_axi_wrapper_v1_0 + + + + + + virtex7 + qvirtex7 + versal + kintex7 + kintex7l + qkintex7 + qkintex7l + akintex7 + artix7 + artix7l + aartix7 + qartix7 + zynq + qzynq + azynq + spartan7 + aspartan7 + virtexu + zynquplus + virtexuplus + virtexuplusHBM + virtexuplus58g + kintexuplus + artixuplus + kintexu + + + /UserIP + + thresholding_axi_wrapper_v1_0 + package_project + AMD + 2 + 2022-09-20T12:31:16Z + + + 2022.1 + + + + + + + + + diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl new file mode 100644 index 0000000000..02c373e8f2 --- /dev/null +++ b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl @@ -0,0 +1,74 @@ +# Definitional proc to organize widgets for parameters. +proc init_gui { IPINST } { + ipgui::add_param $IPINST -name "Component_Name" + #Adding Page + set Page_0 [ipgui::add_page $IPINST -name "Page 0"] + set C [ipgui::add_param $IPINST -name "C" -parent ${Page_0}] + set_property tooltip {Channel Count} ${C} + set C_BITS [ipgui::add_param $IPINST -name "C_BITS" -parent ${Page_0}] + set_property tooltip {Must be clog2(C)} ${C_BITS} + set M [ipgui::add_param $IPINST -name "M" -parent ${Page_0}] + set_property tooltip {Input Precision} ${M} + set N [ipgui::add_param $IPINST -name "N" -parent ${Page_0}] + set_property tooltip {Output Precision} ${N} + + +} + +proc update_PARAM_VALUE.C { PARAM_VALUE.C } { + # Procedure called to update C when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.C { PARAM_VALUE.C } { + # Procedure called to validate C + return true +} + +proc update_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } { + # Procedure called to update C_BITS when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } { + # Procedure called to validate C_BITS + return true +} + +proc update_PARAM_VALUE.M { PARAM_VALUE.M } { + # Procedure called to update M when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.M { PARAM_VALUE.M } { + # Procedure called to validate M + return true +} + +proc update_PARAM_VALUE.N { PARAM_VALUE.N } { + # Procedure called to update N when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.N { PARAM_VALUE.N } { + # Procedure called to validate N + return true +} + + +proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N} +} + +proc update_MODELPARAM_VALUE.M { MODELPARAM_VALUE.M PARAM_VALUE.M } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.M}] ${MODELPARAM_VALUE.M} +} + +proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C} +} + +proc update_MODELPARAM_VALUE.C_BITS { MODELPARAM_VALUE.C_BITS PARAM_VALUE.C_BITS } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.C_BITS}] ${MODELPARAM_VALUE.C_BITS} +} + From 09c6da9fc27c3897d3a9cb7423a3e21978f17c2c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 15:36:12 +0100 Subject: [PATCH 003/111] [thresholding] FINN-44: Add skeleton class for Threshold (the RTL version, no HLS support for this class required). The following functions have been removed when compared to the original Thresholding_Batch class: - get_weightstream_width_padded() needed for cppsim - get_ap_int_max_w() needed for cppsim - get_template_param_values() needed for cppsim - get_hls_compatible_threshold_tensor() needed for cppsim/hlslib - get_verilog_top_module_intf_names() already have TOP verilog module interface names I think - get_op_and_param_counts() not used anywhere - ipgen_extra_directives() needed for cppsim/hlslib Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100755 src/finn/custom_op/fpgadataflow/thresholding_binary_search.py diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py new file mode 100755 index 0000000000..0e1916706b --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -0,0 +1,159 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp + +"""@package thresholding_binary_search +- ONNX i/o tensor shape assumptions for Thresholding: +- input 0 is the input tensor, shape (..., NumChannels) +- input 1 is the threshold tensor, shape (NumChannels, n_thres) +- output 0 is the output tensor, shape (..., NumChannels) - same as input +- the '...' here can be any shape (representing groups of vectors) + +This module creates an RTL IP, HLS is not supported. See 'thresholding_batch' +for a HLS equivalent. +""" + + +class Thresholding_Bin_Search(HLSCustomOp): + """Class that corresponds to finn-rtllib 'thresholding' function.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + return {} + + def calc_tmem(self): + return 0 + + def make_shape_compatible_op(self, model): + return [] + + def infer_node_datatype(self, model): + return + + def verify_node(self): + return [] + + def bram_estimation(self): + return 0 + + def lut_estimation(self): + return 0 + + def get_input_datatype(self): + return None + + def get_output_datatype(self): + return None + + def get_weight_datatype(self): + return None + + def minimize_accumulator_width(self, model): + return None + + def get_instream_width(self): + return 0 + + def get_outstream_width(self): + return 0 + + def get_weightstream_width(self): + return 0 + + def get_folded_input_shape(self): + return tuple([] + []) + + def get_folded_output_shape(self): + return tuple([] + []) + + def get_normal_input_shape(self): + return tuple([] + []) + + def get_normal_output_shape(self): + return tuple([] + []) + + def get_number_output_values(self): + return 0 + + def get_exp_cycles(self): + return 0 + + def get_template_param_values(self): + return dict() + + def make_weight_file(self, weights, weight_file_mode, weight_file_name): + """Produce a file containing given weights (thresholds) in appropriate + format for this layer. This file can be used for either synthesis or + run-time reconfig of weights. + + Arguments: + * weights : numpy array with weights to be put into the file + * weight_file_mode : one of {hls_header, decoupled_verilog_dat, + decoupled_runtime} + * weight_file_name : filename for the weight file to be generated + """ + return + + def generate_params(self, model, path): + return + + def execute_node(self, context, graph): + return + + def code_generation_ipi(self): + return [] + + def global_includes(self): + pass + + def defines(self, var): + pass + + def read_npy_data(self): + pass + + def strm_decl(self): + pass + + def docompute(self): + pass + + def dataoutstrm(self): + pass + + def save_as_npy(self): + pass + + def blackboxfunction(self): + pass + + def pragmas(self): + pass From 1dde2479f65de6cd8bce0be7091189c5b2d313c1 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 15:52:58 +0100 Subject: [PATCH 004/111] [thresholding] FINN-44: Update custom_op's __init__ to pick up new Threshold_binary_search class Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index e5eb483a00..65fbd6e20c 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -58,6 +58,9 @@ from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch +from finn.custom_op.fpgadataflow.thresholding_binary_search import ( + Thresholding_Bin_Search, +) from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch from finn.custom_op.fpgadataflow.vectorvectoractivation import VectorVectorActivation @@ -79,6 +82,7 @@ custom_op["Pool_Batch"] = Pool_Batch custom_op["FMPadding_Batch"] = FMPadding_Batch custom_op["Thresholding_Batch"] = Thresholding_Batch +custom_op["Thresholding_Binary_search"] = Thresholding_Bin_Search custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch From 95082d3ce1f518494910b5444da05722fa8db09c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 19:01:19 +0100 Subject: [PATCH 005/111] [thresholding] FINN-44: Add inital node attributes for Thresholding binary search class Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 0e1916706b..97d8e0b281 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -47,7 +47,45 @@ def __init__(self, onnx_node): super().__init__(onnx_node) def get_nodeattr_types(self): - return {} + my_attrs = { + # parallelization; channels thresholded per cycle + "PE": ("i", True, 0), + # number of channels (each may have different thresholds) + "NumChannels": ("i", True, 0), + # number of steps in thresholding function. Used only in decoupled mode + "numSteps": ("i", True, 1), + # string defining memory type + "ram_style": ("s", False, "distributed", {"distributed", "block"}), + # FINN DataTypes for inputs, outputs + "inputDataType": ("s", True, ""), + "weightDataType": ("s", True, ""), + "outputDataType": ("s", True, ""), + # input and output FIFO depths + "inFIFODepth": ("i", False, 0), + "outFIFODepth": ("i", False, 0), + # number of input vectors, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + # memory mode for the thresholds + # const -- embedded thresholds, default + # decoupled -- streaming thresholds with streamer packaged inside IP + "mem_mode": ("s", False, "const", {"const", "decoupled"}), + # (mem_mode = decoupled only) whether weights (thresholds) will be + # writable through an AXI-lite interface during runtime + # 1 for enabled, 0 for disabled. + # see finn-rtllib/memstream/doc/README for more about the memory + # address map used for writable weights + # IMPORTANT: After using AXI lite to either read or write the weights, + # always "flush" the accelerator by first passing a dummy input + # vector through the accelerator. This will get rid of any old + # weight data from the weight FIFOs. + "runtime_writeable_weights": ("i", False, 0, {0, 1}), + "gen_top_module": ("s", False, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs def calc_tmem(self): return 0 From 72832be6caeefdb895a911988ba5ee77d7d2813f Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 19:02:30 +0100 Subject: [PATCH 006/111] [thresholding] FINN-44: Add calc_tmem() method Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 97d8e0b281..6195a26afb 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -88,7 +88,9 @@ def get_nodeattr_types(self): return my_attrs def calc_tmem(self): - return 0 + num_channels = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + return num_channels // pe def make_shape_compatible_op(self, model): return [] From 0d4e3bea27fce23864729663411a80c6734ed402 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 19:06:07 +0100 Subject: [PATCH 007/111] [thresholding] FINN-44: Add methods for retrieving inut/output/weight data types Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 6195a26afb..50a3ce5b6b 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from qonnx.core.datatype import DataType + from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp """@package thresholding_binary_search @@ -108,13 +110,14 @@ def lut_estimation(self): return 0 def get_input_datatype(self): - return None + return DataType[self.get_nodeattr("inputDataType")] def get_output_datatype(self): - return None + return DataType[self.get_nodeattr("outputDataType")] def get_weight_datatype(self): - return None + """The term 'weights' and 'thresholds' are used interchangably in this class.""" + return DataType[self.get_nodeattr("weightDataType")] def minimize_accumulator_width(self, model): return None From 28568c6777d64adaa9d16f9bc58c3eda96fd7dbc Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 21 Sep 2022 19:09:24 +0100 Subject: [PATCH 008/111] [thresholding] FINN-44: Add methods for retrieving node input/output shapes Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/thresholding_binary_search.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 50a3ce5b6b..ee74f28485 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -132,16 +132,25 @@ def get_weightstream_width(self): return 0 def get_folded_input_shape(self): - return tuple([] + []) + fold = self.calc_tmem() + pe = self.get_nodeattr("PE") + vecs = list(self.get_nodeattr("numInputVectors")) + folded_input_shape = tuple(vecs + [fold, pe]) + return folded_input_shape def get_folded_output_shape(self): - return tuple([] + []) + # same shape as input + return self.get_folded_input_shape() def get_normal_input_shape(self): - return tuple([] + []) + num_channels = self.get_nodeattr("NumChannels") + vecs = list(self.get_nodeattr("numInputVectors")) + normal_input_shape = tuple(vecs + [num_channels]) + return normal_input_shape def get_normal_output_shape(self): - return tuple([] + []) + # same shape as input + return self.get_normal_input_shape() def get_number_output_values(self): return 0 From 280870d25864781b2ce3683a10824049d19f9bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 24 Oct 2022 14:58:32 +0100 Subject: [PATCH 009/111] Thresholding over signed inputs. --- finn-rtllib/thresholding/hdl/thresholding.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 93ccdc51c5..9deeac458c 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -64,8 +64,8 @@ module thresholding #( // Input Stream input logic ivld, - input logic [C_BITS-1:0] icnl, // Ignored for C == 1 - input logic [M -1:0] idat, + input logic [C_BITS-1:0] icnl, // Ignored for C == 1 + input logic signed [M -1:0] idat, // Output Stream output logic ovld, @@ -75,10 +75,10 @@ module thresholding #( // Pipeline Links & Feed typedef struct packed { - logic vld; // Valid data identification - logic [C_BITS-1:0] cnl; // Channel - logic [M -1:0] val; // Original input value - logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage + logic vld; // Valid data identification + logic [C_BITS-1:0] cnl; // Channel + logic signed [M -1:0] val; // Original input value + logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage } pipe_t; uwire pipe_t pipe[0:N]; assign pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} }; // Feed original input @@ -88,13 +88,13 @@ module thresholding #( for(genvar stage = 0; stage < N; stage++) begin : genStages // Threshold Memory - uwire [M-1:0] thresh; + uwire signed [M-1:0] thresh; if(1) begin : blkUpdate // Write control: local select from global address uwire we = twe && tws[stage]; if((C == 1) && (stage == 0)) begin - logic [M-1:0] Thresh = 'x; + logic signed [M-1:0] Thresh = 'x; always_ff @(posedge clk) begin if(rst) Thresh <= 'x; else if(we) Thresh <= twd; @@ -102,7 +102,7 @@ module thresholding #( assign thresh = Thresh; end else begin - logic [M-1:0] Threshs[C * 2**stage]; + logic signed [M-1:0] Threshs[C * 2**stage]; uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; uwire [$clog2(C)+stage-1:0] ra; if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; @@ -114,7 +114,7 @@ module thresholding #( end // Read - logic [M-1:0] RdReg; + logic signed [M-1:0] RdReg; always_ff @(posedge clk) begin if(en) RdReg <= Threshs[ra]; end From 2bf1a21e463297a885b1a7a40ab78fb2deeb2d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 24 Oct 2022 15:38:22 +0100 Subject: [PATCH 010/111] Introduce an optional threshold output bias. --- finn-rtllib/thresholding/hdl/thresholding.sv | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 9deeac458c..cea93e40ab 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -48,7 +48,12 @@ module thresholding #( int unsigned M, // input/threshold precision int unsigned C, // number of channels - localparam int unsigned C_BITS = C < 2? 1 : $clog2(C) + int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + localparam int unsigned O_BITS = BIAS <= 0? + /* unsigned */ $clog2(2**N-BIAS) : + /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( // Global Control input logic clk, @@ -70,7 +75,7 @@ module thresholding #( // Output Stream output logic ovld, output logic [C_BITS-1:0] ocnl, - output logic [N -1:0] odat + output logic [O_BITS-1:0] odat ); // Pipeline Links & Feed @@ -148,6 +153,6 @@ module thresholding #( // Output assign ovld = pipe[N].vld; assign ocnl = pipe[N].cnl; - assign odat = pipe[N].res; + assign odat = pipe[N].res - BIAS; endmodule : thresholding From 4c7b5acd24cf88716fdfdc1dac8d8cc2c2ece44e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 25 Oct 2022 06:17:14 +0100 Subject: [PATCH 011/111] Exposing the thresholding bias through the AXI adapter. --- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 71e54c5ca0..a20952c33b 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -36,6 +36,12 @@ module thresholding_axi #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C // Channels + + int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + + localparam int unsigned O_BITS = BIAS <= 0? + /* unsigned */ $clog2(2**N-BIAS) : + /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( //- Global Control ------------------ input logic ap_clk, @@ -74,7 +80,7 @@ module thresholding_axi #( //- AXI Stream - Output ------------- input logic m_axis_tready, output logic m_axis_tvalid, - output logic [((N+7)/8)*8-1:0] m_axis_tdata + output logic [((O_BITS+7)/8)*8-1:0] m_axis_tdata ); //- Global Control ------------------------------------------------------ uwire clk = ap_clk; @@ -134,12 +140,12 @@ module thresholding_axi #( //- IO-Sandwich with two-stage output buffer for containing a local enable uwire en; - uwire [N-1:0] odat; + uwire [O_BITS-1:0] odat; uwire ovld; if(1) begin : blkOutputDecouple typedef struct { logic vld; - logic [N-1:0] dat; + logic [O_BITS-1:0] dat; } buf_t; buf_t Buf[2] = '{ default: '{ vld: 0, dat: 'x } }; always_ff @(posedge clk) begin @@ -187,7 +193,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C)) core ( + thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS)) core ( .clk, .rst, .twe, .twa, .twd, .en, From 7663d3f60c445ad595a193eb6b493b4f65b2f921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 25 Oct 2022 11:55:19 +0100 Subject: [PATCH 012/111] Have thresholding wrapper pass on bias parameter and compute derived ones. --- .../thresholding/hdl/thresholding_axi_wrapper.v | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index bb6b17b32f..b5c65e5879 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -36,7 +36,12 @@ module thresholding_axi_wrapper #( parameter N, // output precision parameter M, // input/threshold precision parameter C, // Channels - parameter C_BITS //= $clog2(C) + parameter BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + + localparam C_BITS = $clog2(C), + localparam O_BITS = BIAS <= 0? + /* unsigned */ $clog2(2**N-BIAS) : + /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( //- Global Control ------------------ input ap_clk, @@ -75,10 +80,10 @@ module thresholding_axi_wrapper #( //- AXI Stream - Output ------------- input m_axis_tready, output m_axis_tvalid, - output [((N+7)/8)*8-1:0] m_axis_tdata + output [((O_BITS+7)/8)*8-1:0] m_axis_tdata ); - thresholding_axi #(.N(N), .M(M), .C(C)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), From 55e2eacd4b554456bb980f7518f9c79d7be3104d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 25 Oct 2022 15:53:11 +0100 Subject: [PATCH 013/111] Fix typo. --- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index a20952c33b..6b869ba303 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -35,7 +35,7 @@ module thresholding_axi #( int unsigned N, // output precision int unsigned M, // input/threshold precision - int unsigned C // Channels + int unsigned C, // Channels int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) From fa5d71aaf2b4ba3340aa8e07e23d90bf45bee32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 25 Oct 2022 16:58:08 +0100 Subject: [PATCH 014/111] Abandon IPI support files. --- finn-rtllib/thresholding/component.xml | 817 ------------------ .../xgui/thresholding_axi_wrapper_v1_0.tcl | 74 -- 2 files changed, 891 deletions(-) delete mode 100644 finn-rtllib/thresholding/component.xml delete mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml deleted file mode 100644 index 0a56f93316..0000000000 --- a/finn-rtllib/thresholding/component.xml +++ /dev/null @@ -1,817 +0,0 @@ - - - amd.com - user - thresholding_axi_wrapper - 1.0 - - - m_axis - - - - - - - TDATA - - - m_axis_tdata - - - - - TVALID - - - m_axis_tvalid - - - - - TREADY - - - m_axis_tready - - - - - - s_axis - - - - - - - TDATA - - - s_axis_tdata - - - - - TVALID - - - s_axis_tvalid - - - - - TREADY - - - s_axis_tready - - - - - - s_axilite - - - - - - - - - AWADDR - - - s_axilite_AWADDR - - - - - AWVALID - - - s_axilite_AWVALID - - - - - AWREADY - - - s_axilite_AWREADY - - - - - WDATA - - - s_axilite_WDATA - - - - - WSTRB - - - s_axilite_WSTRB - - - - - WVALID - - - s_axilite_WVALID - - - - - WREADY - - - s_axilite_WREADY - - - - - BRESP - - - s_axilite_BRESP - - - - - BVALID - - - s_axilite_BVALID - - - - - BREADY - - - s_axilite_BREADY - - - - - ARADDR - - - s_axilite_ARADDR - - - - - ARVALID - - - s_axilite_ARVALID - - - - - ARREADY - - - s_axilite_ARREADY - - - - - RDATA - - - s_axilite_RDATA - - - - - RRESP - - - s_axilite_RRESP - - - - - RVALID - - - s_axilite_RVALID - - - - - RREADY - - - s_axilite_RREADY - - - - - - ap_rst_n - - - - - - - RST - - - ap_rst_n - - - - - - POLARITY - ACTIVE_LOW - - - - - ap_clk - - - - - - - CLK - - - ap_clk - - - - - - ASSOCIATED_RESET - ap_rst_n - - - ASSOCIATED_BUSIF - m_axis:s_axis:s_axilite - - - - - - - s_axilite - s_axilite - - reg0 - reg0 - 0x0 - 4096 - 32 - register - - - - - - - xilinx_anylanguagesynthesis - Synthesis - :vivado.xilinx.com:synthesis - Verilog - thresholding_axi_wrapper - - xilinx_anylanguagesynthesis_view_fileset - - - - viewChecksum - 5cc8f7a9 - - - - - xilinx_xpgui - UI Layout - :vivado.xilinx.com:xgui.ui - - xilinx_xpgui_view_fileset - - - - viewChecksum - c456596c - - - - - - - ap_clk - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - - - ap_rst_n - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_AWVALID - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_AWREADY - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_AWADDR - - in - - 3 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_WVALID - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_WREADY - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_WDATA - - in - - 31 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_WSTRB - - in - - 3 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - 1 - - - - - s_axilite_BVALID - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_BREADY - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_BRESP - - out - - 1 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - - - s_axilite_ARVALID - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_ARREADY - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_ARADDR - - in - - 0 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_RVALID - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axilite_RREADY - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 0 - - - - - s_axilite_RDATA - - out - - 31 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - - - s_axilite_RRESP - - out - - 1 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - - - s_axis_tready - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axis_tvalid - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - - - s_axis_tdata - - in - - 15 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - 0 - - - - - m_axis_tready - - in - - - std_logic - xilinx_anylanguagesynthesis - - - - 1 - - - - - m_axis_tvalid - - out - - - std_logic - xilinx_anylanguagesynthesis - - - - - - m_axis_tdata - - out - - 7 - 0 - - - - std_logic_vector - xilinx_anylanguagesynthesis - - - - - - - - N - N - 4 - - - M - M - 16 - - - C - C - 1 - - - C_BITS - C Bits - 0 - - - - - - choice_list_74b5137e - ACTIVE_HIGH - ACTIVE_LOW - - - - - xilinx_anylanguagesynthesis_view_fileset - - hdl/thresholding.sv - systemVerilogSource - - - hdl/thresholding_axi.sv - systemVerilogSource - - - hdl/thresholding_axi_wrapper.v - verilogSource - CHECKSUM_2ec027ae - - - - xilinx_xpgui_view_fileset - - xgui/thresholding_axi_wrapper_v1_0.tcl - tclSource - CHECKSUM_c456596c - XGUI_VERSION_2 - - - - thresholding_axi_wrapper_v1_0 - - - N - N - 4 - - - M - M - 16 - - - C - C - 1 - - - C_BITS - C_BITS - 0 - - - Component_Name - thresholding_axi_wrapper_v1_0 - - - - - - virtex7 - qvirtex7 - versal - kintex7 - kintex7l - qkintex7 - qkintex7l - akintex7 - artix7 - artix7l - aartix7 - qartix7 - zynq - qzynq - azynq - spartan7 - aspartan7 - virtexu - zynquplus - virtexuplus - virtexuplusHBM - virtexuplus58g - kintexuplus - artixuplus - kintexu - - - /UserIP - - thresholding_axi_wrapper_v1_0 - package_project - AMD - 2 - 2022-09-20T12:31:16Z - - - 2022.1 - - - - - - - - - diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl deleted file mode 100644 index 02c373e8f2..0000000000 --- a/finn-rtllib/thresholding/xgui/thresholding_axi_wrapper_v1_0.tcl +++ /dev/null @@ -1,74 +0,0 @@ -# Definitional proc to organize widgets for parameters. -proc init_gui { IPINST } { - ipgui::add_param $IPINST -name "Component_Name" - #Adding Page - set Page_0 [ipgui::add_page $IPINST -name "Page 0"] - set C [ipgui::add_param $IPINST -name "C" -parent ${Page_0}] - set_property tooltip {Channel Count} ${C} - set C_BITS [ipgui::add_param $IPINST -name "C_BITS" -parent ${Page_0}] - set_property tooltip {Must be clog2(C)} ${C_BITS} - set M [ipgui::add_param $IPINST -name "M" -parent ${Page_0}] - set_property tooltip {Input Precision} ${M} - set N [ipgui::add_param $IPINST -name "N" -parent ${Page_0}] - set_property tooltip {Output Precision} ${N} - - -} - -proc update_PARAM_VALUE.C { PARAM_VALUE.C } { - # Procedure called to update C when any of the dependent parameters in the arguments change -} - -proc validate_PARAM_VALUE.C { PARAM_VALUE.C } { - # Procedure called to validate C - return true -} - -proc update_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } { - # Procedure called to update C_BITS when any of the dependent parameters in the arguments change -} - -proc validate_PARAM_VALUE.C_BITS { PARAM_VALUE.C_BITS } { - # Procedure called to validate C_BITS - return true -} - -proc update_PARAM_VALUE.M { PARAM_VALUE.M } { - # Procedure called to update M when any of the dependent parameters in the arguments change -} - -proc validate_PARAM_VALUE.M { PARAM_VALUE.M } { - # Procedure called to validate M - return true -} - -proc update_PARAM_VALUE.N { PARAM_VALUE.N } { - # Procedure called to update N when any of the dependent parameters in the arguments change -} - -proc validate_PARAM_VALUE.N { PARAM_VALUE.N } { - # Procedure called to validate N - return true -} - - -proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } { - # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value - set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N} -} - -proc update_MODELPARAM_VALUE.M { MODELPARAM_VALUE.M PARAM_VALUE.M } { - # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value - set_property value [get_property value ${PARAM_VALUE.M}] ${MODELPARAM_VALUE.M} -} - -proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } { - # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value - set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C} -} - -proc update_MODELPARAM_VALUE.C_BITS { MODELPARAM_VALUE.C_BITS PARAM_VALUE.C_BITS } { - # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value - set_property value [get_property value ${PARAM_VALUE.C_BITS}] ${MODELPARAM_VALUE.C_BITS} -} - From 174c0ffe1d0614dd14013de1b073469d79c9191e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 15 Nov 2022 17:59:23 +0000 Subject: [PATCH 015/111] [thresholding] allow for positive and negative bias values Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 4 ++-- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index cea93e40ab..a99c752e17 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -51,7 +51,7 @@ module thresholding #( int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), - localparam int unsigned O_BITS = BIAS <= 0? + localparam int unsigned O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( @@ -153,6 +153,6 @@ module thresholding #( // Output assign ovld = pipe[N].vld; assign ocnl = pipe[N].cnl; - assign odat = pipe[N].res - BIAS; + assign odat = pipe[N].res + BIAS; endmodule : thresholding diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 6b869ba303..795683da1d 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -39,7 +39,7 @@ module thresholding_axi #( int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) - localparam int unsigned O_BITS = BIAS <= 0? + localparam int unsigned O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index b5c65e5879..6bfc2f57a4 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -39,7 +39,7 @@ module thresholding_axi_wrapper #( parameter BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam C_BITS = $clog2(C), - localparam O_BITS = BIAS <= 0? + localparam O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( From 2ec20e5cab8c821d7dc6d652564e85eb1bc84b6b Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 15 Nov 2022 18:00:52 +0000 Subject: [PATCH 016/111] [thresholding] pass bias from top module to thresholding.sv core Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index a99c752e17..f9763af96c 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -48,7 +48,7 @@ module thresholding #( int unsigned M, // input/threshold precision int unsigned C, // number of channels - int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), localparam int unsigned O_BITS = BIAS > 0? diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 795683da1d..e4f3feac3f 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -37,7 +37,7 @@ module thresholding_axi #( int unsigned M, // input/threshold precision int unsigned C, // Channels - int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam int unsigned O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 6bfc2f57a4..1b5921d8ba 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -36,7 +36,7 @@ module thresholding_axi_wrapper #( parameter N, // output precision parameter M, // input/threshold precision parameter C, // Channels - parameter BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam C_BITS = $clog2(C), localparam O_BITS = BIAS > 0? From 861614837dd187dc58ab24af0b5d0cd2050c76e6 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 15 Nov 2022 18:07:56 +0000 Subject: [PATCH 017/111] [thresholding] pass O_BITS from top module to thresholding.sv core Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 4 +--- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 6 ++---- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index f9763af96c..04116e995c 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -51,9 +51,7 @@ module thresholding #( int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), - localparam int unsigned O_BITS = BIAS > 0? - /* unsigned */ $clog2(2**N-BIAS) : - /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) + int unsigned O_BITS )( // Global Control input logic clk, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index e4f3feac3f..a7eec445e0 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -39,9 +39,7 @@ module thresholding_axi #( int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) - localparam int unsigned O_BITS = BIAS > 0? - /* unsigned */ $clog2(2**N-BIAS) : - /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) + int unsigned O_BITS )( //- Global Control ------------------ input logic ap_clk, @@ -193,7 +191,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS)) core ( + thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) core ( .clk, .rst, .twe, .twa, .twd, .en, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 1b5921d8ba..5c43a70445 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -39,7 +39,7 @@ module thresholding_axi_wrapper #( int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) localparam C_BITS = $clog2(C), - localparam O_BITS = BIAS > 0? + parameter O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( @@ -83,7 +83,7 @@ module thresholding_axi_wrapper #( output [((O_BITS+7)/8)*8-1:0] m_axis_tdata ); - thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), From 275abaddee9504360c1589565036611bab5955da Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 15 Nov 2022 18:10:12 +0000 Subject: [PATCH 018/111] [thresholding] pass C_BITS from top module to thresholding.sv core Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 04116e995c..70f94f1c22 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -50,7 +50,7 @@ module thresholding #( int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) - localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + int unsigned C_BITS, int unsigned O_BITS )( // Global Control diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index a7eec445e0..fac69b33fc 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -191,7 +191,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) core ( + thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( .clk, .rst, .twe, .twa, .twd, .en, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 5c43a70445..588f9e4852 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -38,7 +38,7 @@ module thresholding_axi_wrapper #( parameter C, // Channels int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) - localparam C_BITS = $clog2(C), + parameter C_BITS = C < 2 ? 1 : $clog2(C), parameter O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) From 8849c026b780c152dd51c0e007c5f72bdca4808c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 09:31:20 +0000 Subject: [PATCH 019/111] [thresholding] create & fill in RTL template values using FINN Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 16 +-- .../thresholding/hdl/thresholding_axi.sv | 6 +- .../hdl/thresholding_axi_wrapper.v | 14 +-- .../thresholding_binary_search.py | 99 +++++++++++++++++++ 4 files changed, 117 insertions(+), 18 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 70f94f1c22..25d6ff3112 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -43,7 +43,7 @@ * threshold configuration relies on a channel address prefix. Inputs are * accompanied by a channel selector. *****************************************************************************/ -module thresholding #( +module $MODULE_NAME$ #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C, // number of channels @@ -68,7 +68,7 @@ module thresholding #( // Input Stream input logic ivld, input logic [C_BITS-1:0] icnl, // Ignored for C == 1 - input logic signed [M -1:0] idat, + input logic $SIGN$ [M -1:0] idat, // Output Stream output logic ovld, @@ -80,7 +80,7 @@ module thresholding #( typedef struct packed { logic vld; // Valid data identification logic [C_BITS-1:0] cnl; // Channel - logic signed [M -1:0] val; // Original input value + logic $SIGN$ [M -1:0] val; // Original input value logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage } pipe_t; uwire pipe_t pipe[0:N]; @@ -91,13 +91,13 @@ module thresholding #( for(genvar stage = 0; stage < N; stage++) begin : genStages // Threshold Memory - uwire signed [M-1:0] thresh; + uwire $SIGN$ [M-1:0] thresh; if(1) begin : blkUpdate // Write control: local select from global address uwire we = twe && tws[stage]; if((C == 1) && (stage == 0)) begin - logic signed [M-1:0] Thresh = 'x; + logic $SIGN$ [M-1:0] Thresh = 'x; always_ff @(posedge clk) begin if(rst) Thresh <= 'x; else if(we) Thresh <= twd; @@ -105,7 +105,7 @@ module thresholding #( assign thresh = Thresh; end else begin - logic signed [M-1:0] Threshs[C * 2**stage]; + logic $SIGN$ [M-1:0] Threshs[C * 2**stage]; uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; uwire [$clog2(C)+stage-1:0] ra; if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; @@ -117,7 +117,7 @@ module thresholding #( end // Read - logic signed [M-1:0] RdReg; + logic $SIGN$ [M-1:0] RdReg; always_ff @(posedge clk) begin if(en) RdReg <= Threshs[ra]; end @@ -153,4 +153,4 @@ module thresholding #( assign ocnl = pipe[N].cnl; assign odat = pipe[N].res + BIAS; -endmodule : thresholding +endmodule : $MODULE_NAME$ diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index fac69b33fc..97cdfd3e12 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -32,7 +32,7 @@ * @author Thomas B. Preußer *****************************************************************************/ -module thresholding_axi #( +module $MODULE_NAME_AXI$ #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C, // Channels @@ -191,7 +191,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( + $MODULE_NAME$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( .clk, .rst, .twe, .twa, .twd, .en, @@ -199,4 +199,4 @@ module thresholding_axi #( .ovld, .ocnl(), .odat ); -endmodule : thresholding_axi +endmodule : $MODULE_NAME_AXI$ diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 588f9e4852..e3f8596bc8 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -32,11 +32,11 @@ * @author Thomas B. Preußer *****************************************************************************/ -module thresholding_axi_wrapper #( - parameter N, // output precision - parameter M, // input/threshold precision - parameter C, // Channels - int BIAS = 0, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) +module $MODULE_NAME_AXI_WRAPPER$ #( + parameter N = $N$, // output precision + parameter M = $M$, // input/threshold precision + parameter C = $C$, // Channels + int BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) parameter C_BITS = C < 2 ? 1 : $clog2(C), parameter O_BITS = BIAS > 0? @@ -83,7 +83,7 @@ module thresholding_axi_wrapper #( output [((O_BITS+7)/8)*8-1:0] m_axis_tdata ); - thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( + $MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), @@ -124,4 +124,4 @@ module thresholding_axi_wrapper #( .m_axis_tdata(m_axis_tdata) ); -endmodule : thresholding_axi_wrapper +endmodule : $MODULE_NAME_AXI_WRAPPER$ diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index ee74f28485..d546d52843 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -26,6 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp @@ -85,6 +86,7 @@ def get_nodeattr_types(self): # weight data from the weight FIFOs. "runtime_writeable_weights": ("i", False, 0, {0, 1}), "gen_top_module": ("s", False, ""), + "activation_bias": ("i", False, 0), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -174,6 +176,103 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): """ return + # Get the integer from the DataType and string-ify it + # This assumes that the data is in the form "INTx" or similar + def conv_datatype_to_str(self, data_type): + # Handle the case that an int is passed to the function + if isinstance(data_type, int): + return str(data_type) + return str(DataType[data_type].bitwidth()) + + def prepare_codegen_rtl_values(self): + """All dictionary values produced in this function are to replace + their key value(s) in the RTL template files""" + code_gen_dict = {} + + # Identify the module names + code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()] + code_gen_dict["$MODULE_NAME_AXI$"] = [self.get_verilog_top_module_name() + "_axi"] + code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [self.get_verilog_top_module_name() + "_axi_wrapper"] + # Set the top module name - AXI wrapper + code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] + + # Identify the module variables + output_data_type = self.get_nodeattr("outputDataType") # output precision + input_data_type = self.get_nodeattr("inputDataType") # input/threshold precision + num_channels = self.get_nodeattr("NumChannels") # number of channels + bias = self.get_nodeattr("activation_bias") # activation bias value + + code_gen_dict["$N$"] = [self.conv_datatype_to_str(output_data_type)] # output precision + code_gen_dict["$M$"] = [self.conv_datatype_to_str(input_data_type)] # input/threshold precision + code_gen_dict["$C$"] = [self.conv_datatype_to_str(num_channels)] # number of channels + code_gen_dict["$BIAS$"] = [self.conv_datatype_to_str(bias)] # activation bias value + + # Is the input datatype signed or unsigned? The thresholding core needs to know this + if self.get_input_datatype().min() < 0: + code_gen_dict["$SIGN$"] = ["signed"] + else: + code_gen_dict["$SIGN$"] = ["unsigned"] + + return code_gen_dict + + def get_rtl_file_list(self): + return ["thresholding.sv", + "thresholding_axi.sv", + "thresholding_axi_wrapper.v"] + + def get_rtl_file_paths(self): + rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/" + rtl_file_list = self.get_rtl_file_list() + rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list] + return rtl_file_paths + + def get_rtl_template_data(self, path): + with open(path, "r") as f: + template = f.read() + return template + + def fill_in_rtl_template_data(self, replace_dict, template_data): + template_data_cp = template_data + for key in replace_dict: + replacement_line = "\n".join(replace_dict[key]) + template_data_cp = template_data_cp.replace(key, replacement_line) + return template_data_cp + + def dump_rtl_data(self, dest_dir, filename, data): + with open(os.path.join(dest_dir, filename), "w") as f: + f.write(data) + return + + def generate_hdl(self): + # Generate a dictionary of values to put in RTL template + code_gen_dict = self.prepare_codegen_rtl_values() + + # Retrieve the destination directory for the final RTL files + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + for rtl_file_path in self.get_rtl_file_paths(): + # read in original RTL template file + template_data = self.get_rtl_template_data(rtl_file_path) + # apply code generation to templates + data = self.fill_in_rtl_template_data(code_gen_dict, template_data) + # dump filled-in template to destination directory for compilation + file_only_path = rtl_file_path.split('/')[-1] + self.dump_rtl_data(code_gen_dir, file_only_path, data) + + # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation + self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0]) + return + + def code_generation_ipgen(self, model, fpgapart, clk): + self.generate_hdl() + + # set ipgen_path and ip_path so that HLS-Synth transformation + # and stich_ip transformation do not complain + # i.e. during the HLSSynthIP() transformation + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + def generate_params(self, model, path): return From 84704edd5aa7e53351819238f96d4c63dfb45d07 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 09:45:29 +0000 Subject: [PATCH 020/111] [thresholding] add method get_weightstream_width() Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index d546d52843..54fa2def1e 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -131,7 +131,14 @@ def get_outstream_width(self): return 0 def get_weightstream_width(self): - return 0 + # Only 'decoupled' mode is supported + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + pe = self.get_nodeattr("PE") + wp = self.get_weight_datatype().bitwidth() + n_thres_steps = self.get_nodeattr("numSteps") + w_width = pe * wp * n_thres_steps + return w_width def get_folded_input_shape(self): fold = self.calc_tmem() From 9aa7ff3f8c1a0584afd8684e9280d77aada43105 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 09:48:56 +0000 Subject: [PATCH 021/111] [thresholding] add method get_in/output_width() Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 54fa2def1e..a1b75b3de1 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -125,10 +125,12 @@ def minimize_accumulator_width(self, model): return None def get_instream_width(self): - return 0 + i_bits = self.get_input_datatype().bitwidth() + return i_bits * self.get_nodeattr("PE") def get_outstream_width(self): - return 0 + o_bits = self.get_output_datatype().bitwidth() + return o_bits * self.get_nodeattr("PE") def get_weightstream_width(self): # Only 'decoupled' mode is supported From 608b5da9222e2ede4792c487dc4d77fb5ef02e16 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 09:51:10 +0000 Subject: [PATCH 022/111] [thresholding] add method body for code_generation_ipi() Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index a1b75b3de1..4ca651be76 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -289,7 +289,32 @@ def execute_node(self, context, graph): return def code_generation_ipi(self): - return [] + """Constructs and returns the TCL commands for node instantiation as an RTL block.""" + cmd = [] + rtl_file_list = self.get_rtl_file_list() + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + for rtl_file in rtl_file_list: + cmd.append("add_files -norecurse %s" + % ( + os.path.join( + code_gen_dir, rtl_file + ) + )) + + # Create an RTL block, not an IP core (-type ip) + cmd.append("create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)) + + # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between + # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000) + cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]") + + # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between + # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000) + cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]") + + return cmd def global_includes(self): pass From ca6e7e745c4ad810ac824ee3b6ccd55bdb6f724d Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 09:56:01 +0000 Subject: [PATCH 023/111] [thresholding] add method get_verilog_top_module_intf_names() Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 4ca651be76..5dac98ad66 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -316,6 +316,28 @@ def code_generation_ipi(self): return cmd + def get_verilog_top_module_intf_names(self): + """Return a dict of names of input and output interfaces. + The keys reflect the protocols each interface implements: + 'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'. + Values are lists of tuples (axis, aximm) or names (axilite): + 'axis' tuples correspond to the list of node inputs in order, + each tuple is (interface_name, interface_width_bits). + axilite always assumed to be 32 bits and is not tuple (name only). + Each block must have at most one aximm and one axilite.""" + + intf_names = super().get_verilog_top_module_intf_names() + # Only 'decoupled' mode is supported - check before adding axilite interface + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + intf_names["axilite"] = ["s_axilite"] + intf_names["s_axis"] = [["s_axis"]] + intf_names["m_axis"] = [["m_axis"]] + + self.set_nodeattr("runtime_writeable_weights", 1) + + return intf_names + def global_includes(self): pass From 7266ee91af50a149d1d8310401e2a4134cdac18c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 10:41:14 +0000 Subject: [PATCH 024/111] [thresholding] retrieve axilite write sequence for runtime weight programming Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 5dac98ad66..07b675f0f3 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -338,6 +338,59 @@ def get_verilog_top_module_intf_names(self): return intf_names + def find_next_power_of_2(self, n): + # Negative values will loop infinitely below - return 0 + if n <= 0: + return 0 + # If '1' is requested, output will be '0' in the loop below, so avoid this earlier. + elif n == 1: + return 2 # i.e. 2**1 + + # decrement 'n' (to handle cases when `n` itself is a power of 2) + n = n - 1 + + # loop until only one bit is left + while n & n - 1: + # unset rightmost bit + n = n & n - 1 + return n << 1 + + def twos_comp(self, val, bitwidth): + return (val + (1 << bitwidth)) % (1 << bitwidth) + + def prep_axilite_val(self, val): + return self.twos_comp(int(val), self.get_weight_datatype().bitwidth()) + + def get_dynamic_config(self, model, address_stride=1): + ## TODO - not sure this description is correct + """Returns a configuration dictionary containing axilite write commands + in order to program the thresholds into the RTL core during runtime. + The default address stride for the weights is 1 byte.""" + + thresholds = model.get_initializer(self.onnx_node.input[1]) + num_channels, num_weights_per_channel = thresholds.shape + + weight_addr_boundary = self.find_next_power_of_2(num_weights_per_channel) + # Make sure that the next power of 2 (output) is greater than the input + assert weight_addr_boundary >= num_weights_per_channel + + config = {} + channel_cntr = 0 + for channel in thresholds: + channel_start_addr = (channel_cntr * weight_addr_boundary * address_stride) + weight_cntr = 0 + addr = 0 + for weight in channel: + key_name = "{}_{}{}_{}{}".format("axilite", "ch", str(channel_cntr), "w", str(weight_cntr)) + config[key_name] = (channel_start_addr + addr, self.prep_axilite_val(weight)) + + weight_cntr += 1 + addr += address_stride + + channel_cntr += 1 + + return config + def global_includes(self): pass From f88bdbfeb4ade334740d29fa81f6a83174635ad2 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 11:06:39 +0000 Subject: [PATCH 025/111] [thresholding] add methods for creating weight files for each simulation type Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 130 +++++++++++++++++- 1 file changed, 128 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 07b675f0f3..6ed07287ab 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -27,9 +27,17 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os +import numpy as np from qonnx.core.datatype import DataType - +from qonnx.util.basic import ( + interleave_matrix_outer_dim_from_partitions, + roundup_to_integer_multiple, +) +from finn.util.data_packing import ( + pack_innermost_dim_as_hex_string, +) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +import warnings """@package thresholding_binary_search - ONNX i/o tensor shape assumptions for Thresholding: @@ -172,6 +180,63 @@ def get_exp_cycles(self): def get_template_param_values(self): return dict() + def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): + """Convert the original numpy weight matrix orig_weight_matrix into + a form suitable for passing to the hlslib call: + * ensure MH % PE == 0 + * for unsigned inputs, ensure thresholds are positive + * interleave rows between PEs + * reshape into (PE, TMEM, n_thres_steps) and return + """ + mh = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + tmem = mh // pe + assert mh % pe == 0, "Requirement NumChannels divisable by PE is violated." + assert ( + orig_thres_matrix.ndim == 2 + ), """Threshold matrix dimension is + not as expected (2).""" + n_thres_steps = orig_thres_matrix.shape[1] + assert n_thres_steps == self.get_nodeattr( + "numSteps" + ), "Mismatch in threshold steps" + if not self.get_input_datatype().signed(): + # ensure all thresholds are nonnegative + assert (orig_thres_matrix >= 0).all() + # ensure all thresholds are integer + assert np.equal( + np.mod(orig_thres_matrix, 1), 0 + ).all(), "Need int threshold tensor" + ret = orig_thres_matrix + # workaround for vivado_hls threshold bug + if ret[0][0] == 0 and n_thres_steps == 1: + ret = np.copy(ret) + ret[0][0] = 1 + warnings.warn( + "Setting 0-valued first threshold to 1 to avoid vivado_hls bug" + ) + # ensure channels = mh , duplicating if necessary + if ret.shape[0] == 1: + ret = np.tile(ret, (mh, 1)) + assert ( + ret.shape[0] == mh + ), "Channels of threshold matrix are not as expected (mh)" + # distribute rows between PEs + ret = interleave_matrix_outer_dim_from_partitions(ret, pe) + assert ( + ret.shape[0] == pe + ), """First dimension after distribution of the + rows between PEs is not as expected (pe)""" + assert ( + ret.shape[1] == tmem + ), """Second dimension after distribution of the + rows between PEs is not as expected (tmem)""" + assert ( + ret.shape[2] == n_thres_steps + ), """Third dimension after distribution of the + rows between PEs is not as expected (n_thres_steps)""" + return ret.reshape(1, pe, tmem, n_thres_steps) + def make_weight_file(self, weights, weight_file_mode, weight_file_name): """Produce a file containing given weights (thresholds) in appropriate format for this layer. This file can be used for either synthesis or @@ -183,7 +248,68 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): decoupled_runtime} * weight_file_name : filename for the weight file to be generated """ - return + # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related + if "decoupled" not in weight_file_mode: raise Exception("Unrecognized memory mode for this node: {}".format(weight_file_mode)) + + threshold_tensor = self.get_hls_compatible_threshold_tensor(weights) + tdt = self.get_weight_datatype() + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + + # streaming thresholds need to be organized differently + # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps) + decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3)) + # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps) + pe = self.get_nodeattr("PE") + n_thres_steps = self.get_nodeattr("numSteps") + decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2) + decoupled_thres = decoupled_thres.reshape(1, -1, pe * n_thres_steps) + decoupled_thres = decoupled_thres.copy() + decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape( + 1, -1, pe * n_thres_steps + ) + decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy() + + if weight_file_mode == "decoupled_npy": + # save weight stream into npy for cppsim + np.save(weight_file_name, decoupled_thres) + elif weight_file_mode == "decoupled_verilog_dat": + # convert weight values into hexstring + weight_width = self.get_weightstream_width() + # pad to nearest 4 bits to get hex strings + weight_width_padded = roundup_to_integer_multiple(weight_width, 4) + weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( + decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="" + ) + weight_stream = weight_tensor_pe_flipped.flatten() + weight_stream = weight_stream.copy() + with open(weight_file_name, "w") as f: + for val in weight_stream: + f.write(val + "\n") + elif weight_file_mode == "decoupled_runtime": + # memstream axi-lite interface will map each mem line to + # one or multiple 32-bit words + weight_width = self.get_weightstream_width() + words_per_memwidth = 2 ** ceil(log2(weight_width / 32)) + if words_per_memwidth < 1: + words_per_memwidth = 1 + weight_width_padded = words_per_memwidth * 32 + # first, pack and ensure padding to 32 bits + weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( + decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="" + ) + weight_stream = weight_tensor_pe_flipped.flatten() + weight_stream = weight_stream.copy() + with open(weight_file_name, "w") as f: + for val in weight_stream: + # split into groups of 8 hex digits (= 32 bits) + words_32b = textwrap.wrap(val, 8) + words_32b.reverse() + for word_32b in words_32b: + f.write(word_32b + "\n") + else: + raise Exception("Decoupled weight export not yet implemented") # Get the integer from the DataType and string-ify it # This assumes that the data is in the form "INTx" or similar From 560771a1b87a6f25dd2274232be55d86b350f74b Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 11:08:53 +0000 Subject: [PATCH 026/111] [thresholding] add method generate_params() Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 6ed07287ab..ff9f5f4875 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -408,7 +408,45 @@ def code_generation_ipgen(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) + # Generate params for RTLSim + self.generate_params(model, code_gen_dir) + def generate_params(self, model, path): + # Only 'decoupled' mode is supported + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + + code_gen_dir = path + weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir) + thresholds = model.get_initializer(self.onnx_node.input[1]) + self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim) + + # Verilog.dat thresholds: + # also save weights as Verilog .dat file + # note that we provide two different .dat files, one for synth + # and one for synthesis. this is because URAM-based weights always + # need zero weights for synthesis, otherwise they get inferred + # as BRAM + weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir) + weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir) + # sim weights are always the true weights + self.make_weight_file( + thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim + ) + + # Synthesis thresholds: + ram_style = self.get_nodeattr("ram_style") + if ram_style == "ultra": + # UltraRAM must have no memory initializer, or only zeroes + # otherwise BRAM will be inferred instead of URAM + # as a workaround we provide a zero-weight init here + synth_thresholds = np.zeros_like(thresholds, dtype=np.float32) + else: + synth_thresholds = thresholds + self.make_weight_file( + synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth + ) + return def execute_node(self, context, graph): From e763bf80773be4e362f9f9171a01bb4b9eb4dc8a Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 11:11:49 +0000 Subject: [PATCH 027/111] [thresholding] add method for preparing a Pyverilator object for RTL simulation Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index ff9f5f4875..611a75992e 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -28,6 +28,7 @@ import os import numpy as np +import warnings from qonnx.core.datatype import DataType from qonnx.util.basic import ( interleave_matrix_outer_dim_from_partitions, @@ -37,7 +38,12 @@ pack_innermost_dim_as_hex_string, ) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp -import warnings +from finn.util.basic import make_build_dir, get_rtlsim_trace_depth + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None """@package thresholding_binary_search - ONNX i/o tensor shape assumptions for Thresholding: @@ -449,6 +455,31 @@ def generate_params(self, model, path): return + def prepare_rtlsim(self): + """Creates a Verilator emulation library for the RTL code generated + for this node, sets the rtlsim_so attribute to its path and returns + a PyVerilator wrapper around it.""" + + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_paths = [code_gen_dir] + verilog_files = self.get_rtl_file_list() + + # build the Verilator emulation library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_nodeattr("gen_top_module"), + ) + + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + def execute_node(self, context, graph): return From 84e08f18a031dbfacec6a11b980c09885552efdf Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 11:14:47 +0000 Subject: [PATCH 028/111] [thresholding] add method to run rtlsim on a thresholding binary search simulation object Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 611a75992e..4c7c67af72 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -35,6 +35,8 @@ roundup_to_integer_multiple, ) from finn.util.data_packing import ( + npy_to_rtlsim_input, + rtlsim_output_to_npy, pack_innermost_dim_as_hex_string, ) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp @@ -481,6 +483,83 @@ def prepare_rtlsim(self): return sim def execute_node(self, context, graph): + # Perform input checks + if self.get_nodeattr("exec_mode") != "rtlsim": raise Exception("Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(self.get_nodeattr("exec_mode"))) + if self.get_nodeattr("mem_mode") != "decoupled": raise Exception("Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(self.get_nodeattr("mem_mode"))) + + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + # create a npy file fore each input of the node (in_ind is input index) + in_ind = 0 + for inputs in node.input: + # it is assumed that the first input of the node is the data input + # the second input are the weights + # the third input are the thresholds + if in_ind == 0: + assert ( + str(context[inputs].dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = context[inputs].reshape(expected_inp_shape) + + if self.get_input_datatype() == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + export_idt = DataType["BINARY"] + else: + export_idt = self.get_input_datatype() + + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save( + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, + ) + elif in_ind > 2: + raise Exception("Unexpected input found for Thresholding_Batch") + in_ind += 1 + + # Create a PyVerilator wrapper of the RTLSim .so + sim = self.get_rtlsim() + nbits = self.get_instream_width() + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + + super().reset_rtlsim(sim) + super().toggle_clk(sim) + + wnbits = self.get_weightstream_width() + export_wdt = self.get_weight_datatype() + wei = npy_to_rtlsim_input( + "{}/thresholds.npy".format(code_gen_dir), export_wdt, wnbits + ) + num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict = { + "inputs": {"in0": inp, "weights": wei * num_w_reps}, + "outputs": {"s_axis": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] + + # Manage output data + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + + rtlsim_output_to_npy( + output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output return def code_generation_ipi(self): From b0be07adb8e2bb0ab5005169ff0f878efc5c7c80 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 11:16:33 +0000 Subject: [PATCH 029/111] [thresholding] add stubbed method for ipgen_singlenode_code() Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 4c7c67af72..19140a0090 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -665,6 +665,13 @@ def get_dynamic_config(self, model, address_stride=1): return config + def ipgen_singlenode_code(self): + """Normally: Builds the bash script for IP generation.""" + """This is needed for the HLSSynthIP() transformation. + This is an IP, not a HLS node, so therefore provide an empty hook + to prevent any HLS synthesis.""" + pass + def global_includes(self): pass From 30d22f88a40864257a97f7e9e9ff84f25c1bc32e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 16 Nov 2022 13:51:10 +0000 Subject: [PATCH 030/111] [thresholding] update class name to a more consistent naming convention Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/__init__.py | 4 ++-- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 65fbd6e20c..dc9a5a349a 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -59,7 +59,7 @@ from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch from finn.custom_op.fpgadataflow.thresholding_binary_search import ( - Thresholding_Bin_Search, + Thresholding_Binary_Search, ) from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch @@ -82,7 +82,7 @@ custom_op["Pool_Batch"] = Pool_Batch custom_op["FMPadding_Batch"] = FMPadding_Batch custom_op["Thresholding_Batch"] = Thresholding_Batch -custom_op["Thresholding_Binary_search"] = Thresholding_Bin_Search +custom_op["Thresholding_Binary_Search"] = Thresholding_Binary_Search custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 19140a0090..9bf36283da 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -59,7 +59,7 @@ """ -class Thresholding_Bin_Search(HLSCustomOp): +class Thresholding_Binary_Search(HLSCustomOp): """Class that corresponds to finn-rtllib 'thresholding' function.""" def __init__(self, onnx_node): From 3594edddf51f8a13053a6ad99e179d081e15d8d4 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 17 Nov 2022 09:54:46 +0000 Subject: [PATCH 031/111] [thresholding] add fpgadataflow pytests for thresholding binary search node Signed-off-by: Fionn O'Donohoe --- ...fpgadataflow_thresholding_binary_search.py | 417 ++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100755 tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py new file mode 100755 index 0000000000..0a02503300 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -0,0 +1,417 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import numpy as np +from onnx import TensorProto, helper +from pyverilator.util.axi_utils import axilite_write, reset_rtlsim +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.general.multithreshold import multithreshold +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.util.basic import gen_finn_dt_tensor + +from finn.core.rtlsim_exec import rtlsim_exec +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode + +test_fpga_part = "xczu3eg-sbva484-1-e" +target_clk_ns = 5 + +# Helper functions +def sort_thresholds_increasing(thresholds): + return np.sort(thresholds, axis=1) + +def generate_random_threshold_values(input_data_type, num_input_channels, num_steps): + return np.random.randint(input_data_type.min(), input_data_type.max() + 1, (num_input_channels, num_steps)).astype(np.float32) + +def generate_pe_value(fold, num_input_channels): + if fold == -1: + fold = num_input_channels + pe = num_input_channels // fold + assert num_input_channels % pe == 0 + return pe + +# n = batch, c = channel, h = height, w = width of feature map +# Standard = NCHW; FINN = NHWC +# Convert from NCHW to NHWC +def convert_np_array_to_finn_data_layout(data): + return np.transpose(data, (0, 2, 3, 1)) + +# n = batch, c = channel, h = height, w = width of feature map +# Standard = NCHW; FINN = NHWC +# Convert from NHWC to NCHW +def convert_np_array_to_standard_data_layout(data): + return np.transpose(data, (0, 3, 1, 2)) + +def make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs +): + NumChannels = thresholds.shape[0] + + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + + node_inp_list = ["inp", "thresh"] + + Thresholding_node = helper.make_node( + "Thresholding_Binary_Search", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=NumChannels, + PE=pe, + numSteps=thresholds.shape[1], + inputDataType=input_data_type.name, + weightDataType=input_data_type.name, + outputDataType=output_data_type.name, + activation_bias=activation_bias, + mem_mode=mem_mode, + numInputVectors=num_input_vecs, + ) + graph = helper.make_graph( + nodes=[Thresholding_node], + name="thresholding_graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="thresholding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", input_data_type) + model.set_tensor_datatype("outp", output_data_type) + + model.set_tensor_datatype("thresh", input_data_type) + model.set_initializer("thresh", thresholds) + return model + +# Test brief: a particular method for this class was causing a bug - find_next_power_of_2() +# Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable. +# When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary. +# This test is to show that the function that calculates that boundary is working correctly. +# +# A Thresholding_Binary_Search layer was created and a SW generated dataset with a threshold channel +# depth of 1 weight (1 layer of N channels in the thresholding core). However, find_next_power_of_2() +# was returning a next-power-of-2 address boundary at address '0', instead of '2'. This unit test +# is to prove that this bug no longer occurs. It was originally seen when the input datatype +# was 'DataType["BIPOLAR"]'. +@pytest.mark.tbs_unit +@pytest.mark.tbs_all +def test_fpgadataflow_thresholding_binary_search_unit(): + activation = DataType["BIPOLAR"] + input_data_type = DataType["INT16"] + fold = -1 + num_input_channels = 16 + mem_mode = "decoupled" + + # Handle inputs to the test + pe = generate_pe_value(fold, num_input_channels) + num_steps = activation.get_num_possible_values() - 1 + + # Other non-input parameters + num_input_vecs = [1, 2, 2] + output_data_type = activation + if output_data_type == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = output_data_type.min() + + # Generate random thresholds and sort in ascending order + thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + ) + + # Retrieve the class to get the method-under-test + tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] + tbs_inst = getCustomOp(tbs_node) + + test_vector = [ + {"input": -2, "expected_result": 0}, + {"input": -1, "expected_result": 0}, + {"input": 0, "expected_result": 0}, + {"input": 1, "expected_result": 2}, + {"input": 2, "expected_result": 2}, + {"input": 3, "expected_result": 4}, + {"input": 4, "expected_result": 4}, + {"input": 7, "expected_result": 8}, + {"input": 8, "expected_result": 8}, + {"input": 11, "expected_result": 16}, + {"input": 15, "expected_result": 16}, + {"input": 16, "expected_result": 16}, + {"input": 18, "expected_result": 32}, + {"input": 27, "expected_result": 32}, + {"input": 31, "expected_result": 32}, + {"input": 32, "expected_result": 32}, + {"input": 42, "expected_result": 64}, + {"input": 65, "expected_result": 128}, + ] + + for test_dict in test_vector: + output = tbs_inst.find_next_power_of_2(test_dict["input"]) + assert output >= test_dict["input"] + assert output == test_dict["expected_result"] + + return + +# Test brief: Prove that cppsim is not supported for this class +@pytest.mark.tbs_cppsim +@pytest.mark.tbs_all +def test_fpgadataflow_thresholding_binary_search_cppsim(): + input_data_type = DataType["UINT16"] + act = DataType["BIPOLAR"] + fold = -1 + num_input_channels = 16 + mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode + + pe = generate_pe_value(fold, num_input_channels) + num_steps = act.get_num_possible_values() - 1 + + # Generate random, non-decreasing thresholds + thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + + # make the vivado_hls threshold bug appear (incorrect rtlsim result when first + # threshold of first channel is zero, while using BIPOLAR output) + if act == DataType["BIPOLAR"]: + thresholds[0][0] = 0 + thresholds = sort_thresholds_increasing(thresholds) + + # Other non-input parameters + num_input_vecs = [1, 2, 2] + output_data_type = act + if output_data_type == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = output_data_type.min() + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + ) + + # Cppsim is not supported for this class, catch the specific exception thrown by cppsim + # Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is currently not supported. + try: + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + except Exception as e: + if str(e) != "Custom op_type Thresholding_Binary_Search is currently not supported.": + raise + +# Test brief: Prove that memory mode 'const' is not supported for this layer type +@pytest.mark.tbs_const +@pytest.mark.tbs_all +def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): + input_data_type = DataType["INT16"] + activation = DataType["INT4"] + fold = -1 + num_input_channels = 16 + mem_mode = "const" + + pe = generate_pe_value(fold, num_input_channels) + num_input_vecs = [1, 2, 2] + output_data_type = activation + activation_bias = output_data_type.min() + + # Generate random thresholds and sort in ascending order + num_steps = activation.get_num_possible_values() - 1 + thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + ) + + # Prove that 'const' memory mode is not supported for this class + # 'const' memory mode is not supported for this class, catch the specific exception thrown by FINN + # Exception: ('Unrecognized memory mode for this node:', 'const') + try: + model = model.transform(InsertFIFO(True)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + except Exception as e: + if str(e) != "Unrecognized memory mode for this node: {}".format(mem_mode): + raise + # Caught the expected exception, leave the test early + return + +# Test brief: Test that PrepareRTLSim() runs successfully. This function is not +# tested in test_fpgadataflow_thresholding_binary_search() +@pytest.mark.tbs_prep_rtlsim +@pytest.mark.tbs_all +def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): + input_data_type = DataType["INT16"] + act = DataType["INT4"] + fold = -1 + num_input_channels = 16 + mem_mode = "decoupled" + + # Handle inputs to the test + pe = generate_pe_value(fold, num_input_channels) + num_steps = act.get_num_possible_values() - 1 + + # Generate random, non-decreasing thresholds + thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + # make the vivado_hls threshold bug appear (incorrect rtlsim result when first + # threshold of first channel is zero, while using BIPOLAR output) + if act == DataType["BIPOLAR"]: + thresholds[0][0] = 0 + thresholds = sort_thresholds_increasing(thresholds) + + # Other non-input parameters + num_input_vecs = [1, 2, 2] + output_data_type = act + if output_data_type == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = output_data_type.min() + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + ) + + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + return + +# Test brief: Create a Thresholding binary search layer using various parameters +# and test against a SW generated & simulated dataset +# N.B. - fold factor of '-1' is supported only (no PE/SIMD support) +@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) +@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) +@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail +@pytest.mark.parametrize("num_input_channels", [16]) +# no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode() +@pytest.mark.parametrize("mem_mode", ["decoupled"]) +@pytest.mark.tbs_soak +@pytest.mark.tbs_all +def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fold, num_input_channels, mem_mode): + # Handle inputs to the test + pe = generate_pe_value(fold, num_input_channels) + num_steps = activation.get_num_possible_values() - 1 + + # Other non-input parameters + num_input_vecs = [1, 2, 2] + output_data_type = activation + if output_data_type == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = output_data_type.min() + + # generate random input data + tensor_shape = tuple(num_input_vecs + [num_input_channels]) + x = gen_finn_dt_tensor(input_data_type, tensor_shape) + + # Generate random thresholds and sort in ascending order + thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + + # make the vivado_hls threshold bug appear (incorrect rtlsim result when first + # threshold of first channel is zero, while using BIPOLAR output) + if activation == DataType["BIPOLAR"]: + thresholds[0][0] = 0 + + # provide non-decreasing/ascending thresholds + thresholds = sort_thresholds_increasing(thresholds) + + x_nhwc = convert_np_array_to_standard_data_layout(x) + y = multithreshold(x_nhwc, thresholds) + + # convert back to NHWC for comparison to hw outputs + y = convert_np_array_to_finn_data_layout(y) + if activation == DataType["BIPOLAR"]: + # binary to bipolar + y = 2 * y - 1 + else: + # signed offset + y += activation.min() + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + ) + + model = model.transform(InsertFIFO(True)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + + # Retrieve the axilite programming sequence for the weights - for decoupled mode only + tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] + tbs_inst = getCustomOp(tbs_node) + config = tbs_inst.get_dynamic_config(model) + + # Reshape generated data (not from model) + oshape = model.get_tensor_shape("outp") + y_expected = y.reshape(oshape) + + # Helper function that delivers the hook to program the thresholds via AXI-Lite + def config_hook(config): + if config is None: + return None + + def write_thresh_config(sim): + # axi_name = "s_axilite_0_" # works + axi_name = getCustomOp(model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]).get_verilog_top_module_intf_names()['axilite'][0] + axi_name += "_0_" + + # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples + for config_entry in config.values(): + addr = config_entry[0] + val = config_entry[1] + axilite_write(sim, addr, val, basename=axi_name) + + reset_rtlsim(sim) + return write_thresh_config + + input_dict = {"inp": x} + rtlsim_exec(model, input_dict, pre_hook=config_hook(config)) + y_produced = input_dict["outp"] + assert (y_produced == y_expected).all() From 0bee70d5e4bc5fd163b8cf8a84931ac709aaac35 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 17 Nov 2022 10:08:38 +0000 Subject: [PATCH 032/111] [thresholding] add linter fixes Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 121 ++++++++++++------ ...fpgadataflow_thresholding_binary_search.py | 103 ++++++++++++--- 2 files changed, 168 insertions(+), 56 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 9bf36283da..b785abcaa8 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -26,21 +26,22 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os import numpy as np +import os import warnings from qonnx.core.datatype import DataType from qonnx.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, ) + +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import ( npy_to_rtlsim_input, - rtlsim_output_to_npy, pack_innermost_dim_as_hex_string, + rtlsim_output_to_npy, ) -from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp -from finn.util.basic import make_build_dir, get_rtlsim_trace_depth try: from pyverilator import PyVerilator @@ -151,7 +152,10 @@ def get_outstream_width(self): def get_weightstream_width(self): # Only 'decoupled' mode is supported mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + if mem_mode != "decoupled": + raise Exception( + "Unrecognized memory mode for this node: {}".format(mem_mode) + ) pe = self.get_nodeattr("PE") wp = self.get_weight_datatype().bitwidth() n_thres_steps = self.get_nodeattr("numSteps") @@ -257,7 +261,10 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): * weight_file_name : filename for the weight file to be generated """ # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related - if "decoupled" not in weight_file_mode: raise Exception("Unrecognized memory mode for this node: {}".format(weight_file_mode)) + if "decoupled" not in weight_file_mode: + raise Exception( + "Unrecognized memory mode for this node: {}".format(weight_file_mode) + ) threshold_tensor = self.get_hls_compatible_threshold_tensor(weights) tdt = self.get_weight_datatype() @@ -334,21 +341,35 @@ def prepare_codegen_rtl_values(self): # Identify the module names code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()] - code_gen_dict["$MODULE_NAME_AXI$"] = [self.get_verilog_top_module_name() + "_axi"] - code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [self.get_verilog_top_module_name() + "_axi_wrapper"] + code_gen_dict["$MODULE_NAME_AXI$"] = [ + self.get_verilog_top_module_name() + "_axi" + ] + code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [ + self.get_verilog_top_module_name() + "_axi_wrapper" + ] # Set the top module name - AXI wrapper code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] # Identify the module variables - output_data_type = self.get_nodeattr("outputDataType") # output precision - input_data_type = self.get_nodeattr("inputDataType") # input/threshold precision - num_channels = self.get_nodeattr("NumChannels") # number of channels - bias = self.get_nodeattr("activation_bias") # activation bias value - - code_gen_dict["$N$"] = [self.conv_datatype_to_str(output_data_type)] # output precision - code_gen_dict["$M$"] = [self.conv_datatype_to_str(input_data_type)] # input/threshold precision - code_gen_dict["$C$"] = [self.conv_datatype_to_str(num_channels)] # number of channels - code_gen_dict["$BIAS$"] = [self.conv_datatype_to_str(bias)] # activation bias value + output_data_type = self.get_nodeattr("outputDataType") # output precision + input_data_type = self.get_nodeattr( + "inputDataType" + ) # input/threshold precision + num_channels = self.get_nodeattr("NumChannels") # number of channels + bias = self.get_nodeattr("activation_bias") # activation bias value + + code_gen_dict["$N$"] = [ + self.conv_datatype_to_str(output_data_type) + ] # output precision + code_gen_dict["$M$"] = [ + self.conv_datatype_to_str(input_data_type) + ] # input/threshold precision + code_gen_dict["$C$"] = [ + self.conv_datatype_to_str(num_channels) + ] # number of channels + code_gen_dict["$BIAS$"] = [ + self.conv_datatype_to_str(bias) + ] # activation bias value # Is the input datatype signed or unsigned? The thresholding core needs to know this if self.get_input_datatype().min() < 0: @@ -359,9 +380,7 @@ def prepare_codegen_rtl_values(self): return code_gen_dict def get_rtl_file_list(self): - return ["thresholding.sv", - "thresholding_axi.sv", - "thresholding_axi_wrapper.v"] + return ["thresholding.sv", "thresholding_axi.sv", "thresholding_axi_wrapper.v"] def get_rtl_file_paths(self): rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/" @@ -399,7 +418,7 @@ def generate_hdl(self): # apply code generation to templates data = self.fill_in_rtl_template_data(code_gen_dict, template_data) # dump filled-in template to destination directory for compilation - file_only_path = rtl_file_path.split('/')[-1] + file_only_path = rtl_file_path.split("/")[-1] self.dump_rtl_data(code_gen_dir, file_only_path, data) # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation @@ -422,7 +441,10 @@ def code_generation_ipgen(self, model, fpgapart, clk): def generate_params(self, model, path): # Only 'decoupled' mode is supported mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + if mem_mode != "decoupled": + raise Exception( + "Unrecognized memory mode for this node: {}".format(mem_mode) + ) code_gen_dir = path weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir) @@ -484,8 +506,18 @@ def prepare_rtlsim(self): def execute_node(self, context, graph): # Perform input checks - if self.get_nodeattr("exec_mode") != "rtlsim": raise Exception("Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format(self.get_nodeattr("exec_mode"))) - if self.get_nodeattr("mem_mode") != "decoupled": raise Exception("Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format(self.get_nodeattr("mem_mode"))) + if self.get_nodeattr("exec_mode") != "rtlsim": + raise Exception( + "Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format( + self.get_nodeattr("exec_mode") + ) + ) + if self.get_nodeattr("mem_mode") != "decoupled": + raise Exception( + "Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format( + self.get_nodeattr("mem_mode") + ) + ) node = self.onnx_node code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") @@ -569,24 +601,27 @@ def code_generation_ipi(self): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") for rtl_file in rtl_file_list: - cmd.append("add_files -norecurse %s" - % ( - os.path.join( - code_gen_dir, rtl_file - ) - )) + cmd.append( + "add_files -norecurse %s" % (os.path.join(code_gen_dir, rtl_file)) + ) # Create an RTL block, not an IP core (-type ip) - cmd.append("create_bd_cell -type module -reference %s %s" - % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)) + cmd.append( + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ) # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000) - cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]") + cmd.append( + "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]" + ) # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000) - cmd.append("set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]") + cmd.append( + "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]" + ) return cmd @@ -603,7 +638,10 @@ def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() # Only 'decoupled' mode is supported - check before adding axilite interface mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": raise Exception("Unrecognized memory mode for this node: {}".format(mem_mode)) + if mem_mode != "decoupled": + raise Exception( + "Unrecognized memory mode for this node: {}".format(mem_mode) + ) intf_names["axilite"] = ["s_axilite"] intf_names["s_axis"] = [["s_axis"]] intf_names["m_axis"] = [["m_axis"]] @@ -618,7 +656,7 @@ def find_next_power_of_2(self, n): return 0 # If '1' is requested, output will be '0' in the loop below, so avoid this earlier. elif n == 1: - return 2 # i.e. 2**1 + return 2 # i.e. 2**1 # decrement 'n' (to handle cases when `n` itself is a power of 2) n = n - 1 @@ -651,12 +689,17 @@ def get_dynamic_config(self, model, address_stride=1): config = {} channel_cntr = 0 for channel in thresholds: - channel_start_addr = (channel_cntr * weight_addr_boundary * address_stride) + channel_start_addr = channel_cntr * weight_addr_boundary * address_stride weight_cntr = 0 addr = 0 for weight in channel: - key_name = "{}_{}{}_{}{}".format("axilite", "ch", str(channel_cntr), "w", str(weight_cntr)) - config[key_name] = (channel_start_addr + addr, self.prep_axilite_val(weight)) + key_name = "{}_{}{}_{}{}".format( + "axilite", "ch", str(channel_cntr), "w", str(weight_cntr) + ) + config[key_name] = ( + channel_start_addr + addr, + self.prep_axilite_val(weight), + ) weight_cntr += 1 addr += address_stride diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 0a02503300..579b6fe83c 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest + import numpy as np from onnx import TensorProto, helper from pyverilator.util.axi_utils import axilite_write, reset_rtlsim @@ -54,8 +55,14 @@ def sort_thresholds_increasing(thresholds): return np.sort(thresholds, axis=1) + def generate_random_threshold_values(input_data_type, num_input_channels, num_steps): - return np.random.randint(input_data_type.min(), input_data_type.max() + 1, (num_input_channels, num_steps)).astype(np.float32) + return np.random.randint( + input_data_type.min(), + input_data_type.max() + 1, + (num_input_channels, num_steps), + ).astype(np.float32) + def generate_pe_value(fold, num_input_channels): if fold == -1: @@ -64,20 +71,29 @@ def generate_pe_value(fold, num_input_channels): assert num_input_channels % pe == 0 return pe + # n = batch, c = channel, h = height, w = width of feature map # Standard = NCHW; FINN = NHWC # Convert from NCHW to NHWC def convert_np_array_to_finn_data_layout(data): return np.transpose(data, (0, 2, 3, 1)) + # n = batch, c = channel, h = height, w = width of feature map # Standard = NCHW; FINN = NHWC # Convert from NHWC to NCHW def convert_np_array_to_standard_data_layout(data): return np.transpose(data, (0, 3, 1, 2)) + def make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ): NumChannels = thresholds.shape[0] @@ -123,6 +139,7 @@ def make_single_thresholding_binary_search_modelwrapper( model.set_initializer("thresh", thresholds) return model + # Test brief: a particular method for this class was causing a bug - find_next_power_of_2() # Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable. # When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary. @@ -155,11 +172,19 @@ def test_fpgadataflow_thresholding_binary_search_unit(): activation_bias = output_data_type.min() # Generate random thresholds and sort in ascending order - thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) # Generate model from input parameters to the test model = make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ) # Retrieve the class to get the method-under-test @@ -194,6 +219,7 @@ def test_fpgadataflow_thresholding_binary_search_unit(): return + # Test brief: Prove that cppsim is not supported for this class @pytest.mark.tbs_cppsim @pytest.mark.tbs_all @@ -202,13 +228,15 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): act = DataType["BIPOLAR"] fold = -1 num_input_channels = 16 - mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode + mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode pe = generate_pe_value(fold, num_input_channels) num_steps = act.get_num_possible_values() - 1 # Generate random, non-decreasing thresholds - thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) # make the vivado_hls threshold bug appear (incorrect rtlsim result when first # threshold of first channel is zero, while using BIPOLAR output) @@ -226,7 +254,13 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): # Generate model from input parameters to the test model = make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ) # Cppsim is not supported for this class, catch the specific exception thrown by cppsim @@ -236,9 +270,13 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) except Exception as e: - if str(e) != "Custom op_type Thresholding_Binary_Search is currently not supported.": + if ( + str(e) + != "Custom op_type Thresholding_Binary_Search is currently not supported." + ): raise + # Test brief: Prove that memory mode 'const' is not supported for this layer type @pytest.mark.tbs_const @pytest.mark.tbs_all @@ -256,11 +294,19 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): # Generate random thresholds and sort in ascending order num_steps = activation.get_num_possible_values() - 1 - thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) # Generate model from input parameters to the test model = make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ) # Prove that 'const' memory mode is not supported for this class @@ -278,6 +324,7 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): # Caught the expected exception, leave the test early return + # Test brief: Test that PrepareRTLSim() runs successfully. This function is not # tested in test_fpgadataflow_thresholding_binary_search() @pytest.mark.tbs_prep_rtlsim @@ -294,7 +341,9 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): num_steps = act.get_num_possible_values() - 1 # Generate random, non-decreasing thresholds - thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) # make the vivado_hls threshold bug appear (incorrect rtlsim result when first # threshold of first channel is zero, while using BIPOLAR output) if act == DataType["BIPOLAR"]: @@ -311,7 +360,13 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): # Generate model from input parameters to the test model = make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ) model = model.transform(SetExecMode("rtlsim")) @@ -321,18 +376,21 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): model = model.transform(PrepareRTLSim()) return + # Test brief: Create a Thresholding binary search layer using various parameters # and test against a SW generated & simulated dataset # N.B. - fold factor of '-1' is supported only (no PE/SIMD support) @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) -@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail +@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail @pytest.mark.parametrize("num_input_channels", [16]) # no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode() @pytest.mark.parametrize("mem_mode", ["decoupled"]) @pytest.mark.tbs_soak @pytest.mark.tbs_all -def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fold, num_input_channels, mem_mode): +def test_fpgadataflow_thresholding_binary_search( + activation, input_data_type, fold, num_input_channels, mem_mode +): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 @@ -350,7 +408,9 @@ def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fo x = gen_finn_dt_tensor(input_data_type, tensor_shape) # Generate random thresholds and sort in ascending order - thresholds = generate_random_threshold_values(input_data_type, num_input_channels, num_steps) + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) # make the vivado_hls threshold bug appear (incorrect rtlsim result when first # threshold of first channel is zero, while using BIPOLAR output) @@ -374,7 +434,13 @@ def test_fpgadataflow_thresholding_binary_search(activation, input_data_type, fo # Generate model from input parameters to the test model = make_single_thresholding_binary_search_modelwrapper( - thresholds, pe, input_data_type, output_data_type, activation_bias, mem_mode, num_input_vecs + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + mem_mode, + num_input_vecs, ) model = model.transform(InsertFIFO(True)) @@ -399,7 +465,9 @@ def config_hook(config): def write_thresh_config(sim): # axi_name = "s_axilite_0_" # works - axi_name = getCustomOp(model.get_nodes_by_op_type("Thresholding_Binary_Search")[0]).get_verilog_top_module_intf_names()['axilite'][0] + axi_name = getCustomOp( + model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] + ).get_verilog_top_module_intf_names()["axilite"][0] axi_name += "_0_" # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples @@ -409,6 +477,7 @@ def write_thresh_config(sim): axilite_write(sim, addr, val, basename=axi_name) reset_rtlsim(sim) + return write_thresh_config input_dict = {"inp": x} From 0689c6a6a03cbc2e9b3982af971144ac186a2c76 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 17 Nov 2022 10:30:50 +0000 Subject: [PATCH 033/111] [thresholding] add flake8 fixes Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 37 +++++++++------- ...fpgadataflow_thresholding_binary_search.py | 42 +++++++++++-------- 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index b785abcaa8..003dbb2fd9 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -28,7 +28,9 @@ import numpy as np import os +import textwrap import warnings +from math import ceil, log2 from qonnx.core.datatype import DataType from qonnx.util.basic import ( interleave_matrix_outer_dim_from_partitions, @@ -260,7 +262,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): decoupled_runtime} * weight_file_name : filename for the weight file to be generated """ - # There are 'decoupled_*' flavors, just make sure that the flavors are decoupled related + # There are 'decoupled_*' flavors, just make sure that the flavors + # are decoupled related if "decoupled" not in weight_file_mode: raise Exception( "Unrecognized memory mode for this node: {}".format(weight_file_mode) @@ -371,7 +374,8 @@ def prepare_codegen_rtl_values(self): self.conv_datatype_to_str(bias) ] # activation bias value - # Is the input datatype signed or unsigned? The thresholding core needs to know this + # Is the input datatype signed or unsigned? + # The thresholding core needs to know this when comparing weights to inputs if self.get_input_datatype().min() < 0: code_gen_dict["$SIGN$"] = ["signed"] else: @@ -421,7 +425,8 @@ def generate_hdl(self): file_only_path = rtl_file_path.split("/")[-1] self.dump_rtl_data(code_gen_dir, file_only_path, data) - # Before we return - set the 'gen_top_module' attribute for use later by PyVerilator and IPI generation + # Before we return - set the 'gen_top_module' attribute for use later + # by PyVerilator and IPI generation self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0]) return @@ -508,14 +513,14 @@ def execute_node(self, context, graph): # Perform input checks if self.get_nodeattr("exec_mode") != "rtlsim": raise Exception( - "Invalid exec_mode value: {}; exec_mode must be set to 'rtlsim'".format( - self.get_nodeattr("exec_mode") + "Invalid exec_mode value: {}; exec_mode must be set to '{}'".format( + self.get_nodeattr("exec_mode"), "rtlsim" ) ) if self.get_nodeattr("mem_mode") != "decoupled": raise Exception( - "Invalid mem_mode value: {}; mem_mode must be set to 'decoupled'".format( - self.get_nodeattr("mem_mode") + "Invalid mem_mode value: {}; mem_mode must be set to '{}'".format( + self.get_nodeattr("mem_mode"), "decoupled" ) ) @@ -595,7 +600,8 @@ def execute_node(self, context, graph): return def code_generation_ipi(self): - """Constructs and returns the TCL commands for node instantiation as an RTL block.""" + """Constructs and returns the TCL commands for node instantiation as an RTL + block.""" cmd = [] rtl_file_list = self.get_rtl_file_list() code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") @@ -612,15 +618,19 @@ def code_generation_ipi(self): ) # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between - # /Thresholding_Binary_Search_0/s_axis(100000000 and /StreamingFIFO_0/out_V(200000000.000000) + # /Thresholding_Binary_Search_0/s_axis(100000000 and + # /StreamingFIFO_0/out_V(200000000.000000) cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/s_axis]" + "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]" + % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/s_axis") ) # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between - # /StreamingFIFO_1/in0_V(200000000.000000) and /Thresholding_Binary_Search_0/m_axis(100000000) + # /StreamingFIFO_1/in0_V(200000000.000000) and + # /Thresholding_Binary_Search_0/m_axis(100000000) cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [get_bd_intf_pins Thresholding_Binary_Search_0/m_axis]" + "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]" + % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/m_axis") ) return cmd @@ -654,7 +664,7 @@ def find_next_power_of_2(self, n): # Negative values will loop infinitely below - return 0 if n <= 0: return 0 - # If '1' is requested, output will be '0' in the loop below, so avoid this earlier. + # If '1' is requested, output will be '0' in the loop below, avoid this now. elif n == 1: return 2 # i.e. 2**1 @@ -674,7 +684,6 @@ def prep_axilite_val(self, val): return self.twos_comp(int(val), self.get_weight_datatype().bitwidth()) def get_dynamic_config(self, model, address_stride=1): - ## TODO - not sure this description is correct """Returns a configuration dictionary containing axilite write commands in order to program the thresholds into the RTL core during runtime. The default address stride for the weights is 1 byte.""" diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 579b6fe83c..81a089844d 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -51,6 +51,7 @@ test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 + # Helper functions def sort_thresholds_increasing(thresholds): return np.sort(thresholds, axis=1) @@ -140,16 +141,18 @@ def make_single_thresholding_binary_search_modelwrapper( return model -# Test brief: a particular method for this class was causing a bug - find_next_power_of_2() -# Weights in the thresholding core are programmed on a per-channel basis and are byte-addressable. -# When a channel is programmed, the next channel can start programming at the next power-of-2 byte boundary. -# This test is to show that the function that calculates that boundary is working correctly. +# Test brief: a particular method for this class was causing a bug: +# find_next_power_of_2() +# Weights in the thresholding core are programmed on a per-channel basis and are +# byte-addressable. When a channel is programmed, the next channel can start +# programming at the next power-of-2 byte boundary. This test is to show that the +# function that calculates that boundary is working correctly. # -# A Thresholding_Binary_Search layer was created and a SW generated dataset with a threshold channel -# depth of 1 weight (1 layer of N channels in the thresholding core). However, find_next_power_of_2() -# was returning a next-power-of-2 address boundary at address '0', instead of '2'. This unit test -# is to prove that this bug no longer occurs. It was originally seen when the input datatype -# was 'DataType["BIPOLAR"]'. +# A Thresholding_Binary_Search layer was created and a SW generated dataset with a +# threshold channel depth of 1 weight (1 layer of N channels in the thresholding core). +# However, find_next_power_of_2() was returning a next-power-of-2 address boundary at +# address '0', instead of '2'. This unit test is to prove that this bug no longer +# occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'. @pytest.mark.tbs_unit @pytest.mark.tbs_all def test_fpgadataflow_thresholding_binary_search_unit(): @@ -228,7 +231,9 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): act = DataType["BIPOLAR"] fold = -1 num_input_channels = 16 - mem_mode = "decoupled" # 'const' is unsupported - see test_fpgadataflow_thresholding_binary_search_const_mem_mode + # 'const' is unsupported see test: + # test_fpgadataflow_thresholding_binary_search_const_mem_mode() + mem_mode = "decoupled" pe = generate_pe_value(fold, num_input_channels) num_steps = act.get_num_possible_values() - 1 @@ -263,8 +268,9 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): num_input_vecs, ) - # Cppsim is not supported for this class, catch the specific exception thrown by cppsim - # Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is currently not supported. + # Cppsim is not supported for this class, catch the specific exception thrown by + # cppsim. Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is + # currently not supported. try: model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) @@ -310,8 +316,8 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): ) # Prove that 'const' memory mode is not supported for this class - # 'const' memory mode is not supported for this class, catch the specific exception thrown by FINN - # Exception: ('Unrecognized memory mode for this node:', 'const') + # 'const' memory mode is not supported for this class, catch the specific exception + # thrown by FINN. Exception: ('Unrecognized memory mode for this node:', 'const') try: model = model.transform(InsertFIFO(True)) model = model.transform(GiveUniqueNodeNames()) @@ -384,7 +390,8 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) @pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail @pytest.mark.parametrize("num_input_channels", [16]) -# no need to test 'const' mode, it's already done in test_fpgadataflow_thresholding_binary_search_const_mem_mode() +# no need to test 'const' mode, it's already done in: +# test_fpgadataflow_thresholding_binary_search_const_mem_mode() @pytest.mark.parametrize("mem_mode", ["decoupled"]) @pytest.mark.tbs_soak @pytest.mark.tbs_all @@ -449,7 +456,7 @@ def test_fpgadataflow_thresholding_binary_search( model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) - # Retrieve the axilite programming sequence for the weights - for decoupled mode only + # Retrieve the axilite programming sequence for weights - for decoupled mode only tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] tbs_inst = getCustomOp(tbs_node) config = tbs_inst.get_dynamic_config(model) @@ -470,7 +477,8 @@ def write_thresh_config(sim): ).get_verilog_top_module_intf_names()["axilite"][0] axi_name += "_0_" - # 1. Write config registers to the Threshold memory, dict defines (addr, value) tuples + # Write config registers to the Threshold memory. + # The dictionary defines (addr, value) tuples. for config_entry in config.values(): addr = config_entry[0] val = config_entry[1] From e9a4a7bb9dbdcc6dd2a7dd900f62851891793017 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 17 Nov 2022 12:01:52 +0000 Subject: [PATCH 034/111] [thresholding] change the pytest markers to omit tests from quicktest Signed-off-by: Fionn O'Donohoe --- ...fpgadataflow_thresholding_binary_search.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 81a089844d..e2189c4c79 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -153,8 +153,8 @@ def make_single_thresholding_binary_search_modelwrapper( # However, find_next_power_of_2() was returning a next-power-of-2 address boundary at # address '0', instead of '2'. This unit test is to prove that this bug no longer # occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'. -@pytest.mark.tbs_unit -@pytest.mark.tbs_all +@pytest.mark.fpgadataflow +@pytest.mark.vivado def test_fpgadataflow_thresholding_binary_search_unit(): activation = DataType["BIPOLAR"] input_data_type = DataType["INT16"] @@ -224,8 +224,8 @@ def test_fpgadataflow_thresholding_binary_search_unit(): # Test brief: Prove that cppsim is not supported for this class -@pytest.mark.tbs_cppsim -@pytest.mark.tbs_all +@pytest.mark.fpgadataflow +@pytest.mark.vivado def test_fpgadataflow_thresholding_binary_search_cppsim(): input_data_type = DataType["UINT16"] act = DataType["BIPOLAR"] @@ -284,8 +284,8 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): # Test brief: Prove that memory mode 'const' is not supported for this layer type -@pytest.mark.tbs_const -@pytest.mark.tbs_all +@pytest.mark.fpgadataflow +@pytest.mark.vivado def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): input_data_type = DataType["INT16"] activation = DataType["INT4"] @@ -333,8 +333,8 @@ def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): # Test brief: Test that PrepareRTLSim() runs successfully. This function is not # tested in test_fpgadataflow_thresholding_binary_search() -@pytest.mark.tbs_prep_rtlsim -@pytest.mark.tbs_all +@pytest.mark.fpgadataflow +@pytest.mark.vivado def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): input_data_type = DataType["INT16"] act = DataType["INT4"] @@ -393,8 +393,9 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): # no need to test 'const' mode, it's already done in: # test_fpgadataflow_thresholding_binary_search_const_mem_mode() @pytest.mark.parametrize("mem_mode", ["decoupled"]) -@pytest.mark.tbs_soak -@pytest.mark.tbs_all +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow def test_fpgadataflow_thresholding_binary_search( activation, input_data_type, fold, num_input_channels, mem_mode ): From 41c0b4b0799674cd468b9aabfe47a5992891e873 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 25 Nov 2022 14:57:39 +0000 Subject: [PATCH 035/111] [thresholding] update copyright banners of files I have added/changed Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/__init__.py | 2 +- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +- .../test_fpgadataflow_thresholding_binary_search.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index dc9a5a349a..0e17726d48 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2022, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 003dbb2fd9..7df755ae1b 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2022, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index e2189c4c79..1e3521a610 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2022, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without From 71ef39b38d70365f4812cfd6f0d46a1d0198b269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Thu, 1 Dec 2022 13:12:33 +0000 Subject: [PATCH 036/111] Translate byte to parameter word addressing in AXI adapter. --- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 97cdfd3e12..c766e60b9e 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -30,6 +30,12 @@ * * @brief All-AXI interface adapter for thresholding module. * @author Thomas B. Preußer + * + * @description + * This AXI adapter fits the core thresholding functionality: + * - with AXI stream data interfaces with flow control + * - with implicit round-robin channel rotation as used by FINN, and + * - performs aligned byte address to parameter word address translation. *****************************************************************************/ module $MODULE_NAME_AXI$ #( @@ -49,7 +55,7 @@ module $MODULE_NAME_AXI$ #( // Writing input logic s_axilite_AWVALID, output logic s_axilite_AWREADY, - input logic [$clog2(C)+N-1:0] s_axilite_AWADDR, + input logic [$clog2(C)+N+1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored input logic s_axilite_WVALID, output logic s_axilite_WREADY, @@ -109,7 +115,7 @@ module $MODULE_NAME_AXI$ #( else begin if(!WABusy) begin WABusy <= s_axilite_AWVALID; - Addr <= s_axilite_AWADDR[$clog2(C)+N-1:0]; + Addr <= s_axilite_AWADDR[$clog2(C)+N+1:2]; end if(!WDBusy) begin WDBusy <= s_axilite_WVALID; From d44a66c949177163099e36ce4e57c9ac992ee70b Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 19 Dec 2022 15:05:08 +0000 Subject: [PATCH 037/111] [thresholding] remove unused attribute Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 7df755ae1b..2ebe6f0a39 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -103,7 +103,6 @@ def get_nodeattr_types(self): # always "flush" the accelerator by first passing a dummy input # vector through the accelerator. This will get rid of any old # weight data from the weight FIFOs. - "runtime_writeable_weights": ("i", False, 0, {0, 1}), "gen_top_module": ("s", False, ""), "activation_bias": ("i", False, 0), } @@ -656,8 +655,6 @@ def get_verilog_top_module_intf_names(self): intf_names["s_axis"] = [["s_axis"]] intf_names["m_axis"] = [["m_axis"]] - self.set_nodeattr("runtime_writeable_weights", 1) - return intf_names def find_next_power_of_2(self, n): From f79b9ec3e19d83d6469e6e563422fbba70f7a87a Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 19 Dec 2022 15:53:20 +0000 Subject: [PATCH 038/111] [thresholding] remove unnecessary HLS bug prevention check Signed-off-by: Fionn O'Donohoe --- ...test_fpgadataflow_thresholding_binary_search.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 1e3521a610..ab98189ea5 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -242,11 +242,6 @@ def test_fpgadataflow_thresholding_binary_search_cppsim(): thresholds = generate_random_threshold_values( input_data_type, num_input_channels, num_steps ) - - # make the vivado_hls threshold bug appear (incorrect rtlsim result when first - # threshold of first channel is zero, while using BIPOLAR output) - if act == DataType["BIPOLAR"]: - thresholds[0][0] = 0 thresholds = sort_thresholds_increasing(thresholds) # Other non-input parameters @@ -350,10 +345,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): thresholds = generate_random_threshold_values( input_data_type, num_input_channels, num_steps ) - # make the vivado_hls threshold bug appear (incorrect rtlsim result when first - # threshold of first channel is zero, while using BIPOLAR output) - if act == DataType["BIPOLAR"]: - thresholds[0][0] = 0 thresholds = sort_thresholds_increasing(thresholds) # Other non-input parameters @@ -420,11 +411,6 @@ def test_fpgadataflow_thresholding_binary_search( input_data_type, num_input_channels, num_steps ) - # make the vivado_hls threshold bug appear (incorrect rtlsim result when first - # threshold of first channel is zero, while using BIPOLAR output) - if activation == DataType["BIPOLAR"]: - thresholds[0][0] = 0 - # provide non-decreasing/ascending thresholds thresholds = sort_thresholds_increasing(thresholds) From 7b82de2c78e14f9dc2017e7c5e9378865011e9da Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 19 Dec 2022 16:40:57 +0000 Subject: [PATCH 039/111] [thresholding] align methods with hlscustom class by adding in additional input parameter Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/thresholding_binary_search.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 2ebe6f0a39..d69c7e47b7 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -129,10 +129,10 @@ def bram_estimation(self): def lut_estimation(self): return 0 - def get_input_datatype(self): + def get_input_datatype(self, ind=0): return DataType[self.get_nodeattr("inputDataType")] - def get_output_datatype(self): + def get_output_datatype(self, ind=0): return DataType[self.get_nodeattr("outputDataType")] def get_weight_datatype(self): @@ -142,11 +142,11 @@ def get_weight_datatype(self): def minimize_accumulator_width(self, model): return None - def get_instream_width(self): + def get_instream_width(self, ind=0): i_bits = self.get_input_datatype().bitwidth() return i_bits * self.get_nodeattr("PE") - def get_outstream_width(self): + def get_outstream_width(self, ind=0): o_bits = self.get_output_datatype().bitwidth() return o_bits * self.get_nodeattr("PE") @@ -163,24 +163,24 @@ def get_weightstream_width(self): w_width = pe * wp * n_thres_steps return w_width - def get_folded_input_shape(self): + def get_folded_input_shape(self, ind=0): fold = self.calc_tmem() pe = self.get_nodeattr("PE") vecs = list(self.get_nodeattr("numInputVectors")) folded_input_shape = tuple(vecs + [fold, pe]) return folded_input_shape - def get_folded_output_shape(self): + def get_folded_output_shape(self, ind=0): # same shape as input return self.get_folded_input_shape() - def get_normal_input_shape(self): + def get_normal_input_shape(self, ind=0): num_channels = self.get_nodeattr("NumChannels") vecs = list(self.get_nodeattr("numInputVectors")) normal_input_shape = tuple(vecs + [num_channels]) return normal_input_shape - def get_normal_output_shape(self): + def get_normal_output_shape(self, ind=0): # same shape as input return self.get_normal_input_shape() From e2816d3e1c8ce75ad9f0b1aafbef25af8b305a6c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 19 Dec 2022 16:50:26 +0000 Subject: [PATCH 040/111] [thresholding] replace hardcoded tcl commands with node attributes Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index d69c7e47b7..fe976c7dbe 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -105,6 +105,7 @@ def get_nodeattr_types(self): # weight data from the weight FIFOs. "gen_top_module": ("s", False, ""), "activation_bias": ("i", False, 0), + "clkFreq": ("i", False, 200000000), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -604,6 +605,10 @@ def code_generation_ipi(self): cmd = [] rtl_file_list = self.get_rtl_file_list() code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + node_name = self.onnx_node.name + dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0] + din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0] + clock_freq = self.get_nodeattr("clkFreq") for rtl_file in rtl_file_list: cmd.append( @@ -616,20 +621,14 @@ def code_generation_ipi(self): % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) ) - # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between - # /Thresholding_Binary_Search_0/s_axis(100000000 and - # /StreamingFIFO_0/out_V(200000000.000000) cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]" - % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/s_axis") + "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]" + % (clock_freq, "get_bd_intf_pins", node_name, din_name) ) - # ERROR: [BD 41-237] Bus Interface property FREQ_HZ does not match between - # /StreamingFIFO_1/in0_V(200000000.000000) and - # /Thresholding_Binary_Search_0/m_axis(100000000) cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {200000000}] [%s %s]" - % ("get_bd_intf_pins", "Thresholding_Binary_Search_0/m_axis") + "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]" + % (clock_freq, "get_bd_intf_pins", node_name, dout_name) ) return cmd From bda05ae16e62627d414c80452caa012dee7aa0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 20 Dec 2022 09:24:04 +0000 Subject: [PATCH 041/111] Fix BIAS parameter specification. --- finn-rtllib/thresholding/hdl/thresholding.sv | 2 +- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 25d6ff3112..b26747d1ff 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -48,7 +48,7 @@ module $MODULE_NAME$ #( int unsigned M, // input/threshold precision int unsigned C, // number of channels - int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) int unsigned C_BITS, int unsigned O_BITS diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index c766e60b9e..5cd7746b82 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -43,7 +43,7 @@ module $MODULE_NAME_AXI$ #( int unsigned M, // input/threshold precision int unsigned C, // Channels - int BIAS, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) int unsigned O_BITS )( From 7388e7613ef38b6caa1fafb1129973cefef8716a Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 20 Dec 2022 14:08:07 +0000 Subject: [PATCH 042/111] [thresholding] remove unused ram_style attribute Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/thresholding_binary_search.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index fe976c7dbe..9cbe049be3 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -76,8 +76,6 @@ def get_nodeattr_types(self): "NumChannels": ("i", True, 0), # number of steps in thresholding function. Used only in decoupled mode "numSteps": ("i", True, 1), - # string defining memory type - "ram_style": ("s", False, "distributed", {"distributed", "block"}), # FINN DataTypes for inputs, outputs "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), @@ -470,14 +468,7 @@ def generate_params(self, model, path): ) # Synthesis thresholds: - ram_style = self.get_nodeattr("ram_style") - if ram_style == "ultra": - # UltraRAM must have no memory initializer, or only zeroes - # otherwise BRAM will be inferred instead of URAM - # as a workaround we provide a zero-weight init here - synth_thresholds = np.zeros_like(thresholds, dtype=np.float32) - else: - synth_thresholds = thresholds + synth_thresholds = thresholds self.make_weight_file( synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth ) From e965396e4ddf4848fc9a17b04fa4908a0924568e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 10:40:52 +0000 Subject: [PATCH 043/111] [thresholding] skip test for unsupported cppsim configuration and merge tests Signed-off-by: Fionn O'Donohoe --- ...fpgadataflow_thresholding_binary_search.py | 65 +++---------------- 1 file changed, 9 insertions(+), 56 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index ab98189ea5..947109794e 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -96,6 +96,7 @@ def make_single_thresholding_binary_search_modelwrapper( mem_mode, num_input_vecs, ): + NumChannels = thresholds.shape[0] inp = helper.make_tensor_value_info( @@ -223,61 +224,6 @@ def test_fpgadataflow_thresholding_binary_search_unit(): return -# Test brief: Prove that cppsim is not supported for this class -@pytest.mark.fpgadataflow -@pytest.mark.vivado -def test_fpgadataflow_thresholding_binary_search_cppsim(): - input_data_type = DataType["UINT16"] - act = DataType["BIPOLAR"] - fold = -1 - num_input_channels = 16 - # 'const' is unsupported see test: - # test_fpgadataflow_thresholding_binary_search_const_mem_mode() - mem_mode = "decoupled" - - pe = generate_pe_value(fold, num_input_channels) - num_steps = act.get_num_possible_values() - 1 - - # Generate random, non-decreasing thresholds - thresholds = generate_random_threshold_values( - input_data_type, num_input_channels, num_steps - ) - thresholds = sort_thresholds_increasing(thresholds) - - # Other non-input parameters - num_input_vecs = [1, 2, 2] - output_data_type = act - if output_data_type == DataType["BIPOLAR"]: - activation_bias = 0 - else: - activation_bias = output_data_type.min() - - # Generate model from input parameters to the test - model = make_single_thresholding_binary_search_modelwrapper( - thresholds, - pe, - input_data_type, - output_data_type, - activation_bias, - mem_mode, - num_input_vecs, - ) - - # Cppsim is not supported for this class, catch the specific exception thrown by - # cppsim. Exception raised in cppsim: Custom op_type Thresholding_Binary_Search is - # currently not supported. - try: - model = model.transform(PrepareCppSim()) - model = model.transform(CompileCppSim()) - model = model.transform(SetExecMode("cppsim")) - except Exception as e: - if ( - str(e) - != "Custom op_type Thresholding_Binary_Search is currently not supported." - ): - raise - - # Test brief: Prove that memory mode 'const' is not supported for this layer type @pytest.mark.fpgadataflow @pytest.mark.vivado @@ -384,16 +330,23 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): # no need to test 'const' mode, it's already done in: # test_fpgadataflow_thresholding_binary_search_const_mem_mode() @pytest.mark.parametrize("mem_mode", ["decoupled"]) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_thresholding_binary_search( - activation, input_data_type, fold, num_input_channels, mem_mode + activation, input_data_type, fold, num_input_channels, mem_mode, exec_mode ): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 + # Cppsim is not supported for this node (as it is an RTL node) + if exec_mode == "cppsim": + pytest.skip("cppsim not supported for RTL Thresholding Binary Search node") + elif exec_mode != "rtlsim": + raise Exception("Unknown exec_mode: {}".format(exec_mode)) + # Other non-input parameters num_input_vecs = [1, 2, 2] output_data_type = activation From 2b8a674573e3415e54665ff05a2db75d5c20f30f Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 11:07:40 +0000 Subject: [PATCH 044/111] [thresholding] moving find_next_power_of_2() to the util suite Signed-off-by: Fionn O'Donohoe --- src/finn/util/basic.py | 17 ++++ ...fpgadataflow_thresholding_binary_search.py | 82 ------------------- tests/util/test_basic.py | 62 ++++++++++++++ 3 files changed, 79 insertions(+), 82 deletions(-) create mode 100755 tests/util/test_basic.py diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 4aba87216c..9a66cf90eb 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -216,3 +216,20 @@ def is_exe(fpath): return exe_file return None + +def find_next_power_of_2(n): + # Negative values will loop infinitely below - return 0 + if n <= 0: + return 0 + # If '1' is requested, output will be '0' in the loop below, avoid this now. + elif n == 1: + return 2 # i.e. 2**1 + + # decrement 'n' (to handle cases when `n` itself is a power of 2) + n = n - 1 + + # loop until only one bit is left + while n & n - 1: + # unset rightmost bit + n = n & n - 1 + return n << 1 diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 947109794e..29fc2828b6 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -142,88 +142,6 @@ def make_single_thresholding_binary_search_modelwrapper( return model -# Test brief: a particular method for this class was causing a bug: -# find_next_power_of_2() -# Weights in the thresholding core are programmed on a per-channel basis and are -# byte-addressable. When a channel is programmed, the next channel can start -# programming at the next power-of-2 byte boundary. This test is to show that the -# function that calculates that boundary is working correctly. -# -# A Thresholding_Binary_Search layer was created and a SW generated dataset with a -# threshold channel depth of 1 weight (1 layer of N channels in the thresholding core). -# However, find_next_power_of_2() was returning a next-power-of-2 address boundary at -# address '0', instead of '2'. This unit test is to prove that this bug no longer -# occurs. It was originally seen when the input datatype was 'DataType["BIPOLAR"]'. -@pytest.mark.fpgadataflow -@pytest.mark.vivado -def test_fpgadataflow_thresholding_binary_search_unit(): - activation = DataType["BIPOLAR"] - input_data_type = DataType["INT16"] - fold = -1 - num_input_channels = 16 - mem_mode = "decoupled" - - # Handle inputs to the test - pe = generate_pe_value(fold, num_input_channels) - num_steps = activation.get_num_possible_values() - 1 - - # Other non-input parameters - num_input_vecs = [1, 2, 2] - output_data_type = activation - if output_data_type == DataType["BIPOLAR"]: - activation_bias = 0 - else: - activation_bias = output_data_type.min() - - # Generate random thresholds and sort in ascending order - thresholds = generate_random_threshold_values( - input_data_type, num_input_channels, num_steps - ) - - # Generate model from input parameters to the test - model = make_single_thresholding_binary_search_modelwrapper( - thresholds, - pe, - input_data_type, - output_data_type, - activation_bias, - mem_mode, - num_input_vecs, - ) - - # Retrieve the class to get the method-under-test - tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] - tbs_inst = getCustomOp(tbs_node) - - test_vector = [ - {"input": -2, "expected_result": 0}, - {"input": -1, "expected_result": 0}, - {"input": 0, "expected_result": 0}, - {"input": 1, "expected_result": 2}, - {"input": 2, "expected_result": 2}, - {"input": 3, "expected_result": 4}, - {"input": 4, "expected_result": 4}, - {"input": 7, "expected_result": 8}, - {"input": 8, "expected_result": 8}, - {"input": 11, "expected_result": 16}, - {"input": 15, "expected_result": 16}, - {"input": 16, "expected_result": 16}, - {"input": 18, "expected_result": 32}, - {"input": 27, "expected_result": 32}, - {"input": 31, "expected_result": 32}, - {"input": 32, "expected_result": 32}, - {"input": 42, "expected_result": 64}, - {"input": 65, "expected_result": 128}, - ] - - for test_dict in test_vector: - output = tbs_inst.find_next_power_of_2(test_dict["input"]) - assert output >= test_dict["input"] - assert output == test_dict["expected_result"] - - return - - # Test brief: Prove that memory mode 'const' is not supported for this layer type @pytest.mark.fpgadataflow @pytest.mark.vivado diff --git a/tests/util/test_basic.py b/tests/util/test_basic.py new file mode 100755 index 0000000000..d2586f4f19 --- /dev/null +++ b/tests/util/test_basic.py @@ -0,0 +1,62 @@ +# Copyright (C) 2023, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import finn.util.basic as basic + + +@pytest.mark.util +def test_next_power_of_2(): + test_vector = [ + {"input": -2, "expected_result": 0}, + {"input": -1, "expected_result": 0}, + {"input": 0, "expected_result": 0}, + {"input": 1, "expected_result": 2}, + {"input": 2, "expected_result": 2}, + {"input": 3, "expected_result": 4}, + {"input": 4, "expected_result": 4}, + {"input": 7, "expected_result": 8}, + {"input": 8, "expected_result": 8}, + {"input": 11, "expected_result": 16}, + {"input": 15, "expected_result": 16}, + {"input": 16, "expected_result": 16}, + {"input": 18, "expected_result": 32}, + {"input": 27, "expected_result": 32}, + {"input": 31, "expected_result": 32}, + {"input": 32, "expected_result": 32}, + {"input": 42, "expected_result": 64}, + {"input": 65, "expected_result": 128}, + ] + + for test_dict in test_vector: + output = basic.find_next_power_of_2(test_dict["input"]) + assert output >= test_dict["input"] + assert output == test_dict["expected_result"] + + return From 45bb19f2821bde10cf7303a193869160fd46c72e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 11:22:08 +0000 Subject: [PATCH 045/111] [thresholding] remove find_next_power_of_2() from thresholding binary search CustomOp class Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 9cbe049be3..c681bb2631 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -38,7 +38,7 @@ ) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir, find_next_power_of_2 from finn.util.data_packing import ( npy_to_rtlsim_input, pack_innermost_dim_as_hex_string, @@ -647,23 +647,6 @@ def get_verilog_top_module_intf_names(self): return intf_names - def find_next_power_of_2(self, n): - # Negative values will loop infinitely below - return 0 - if n <= 0: - return 0 - # If '1' is requested, output will be '0' in the loop below, avoid this now. - elif n == 1: - return 2 # i.e. 2**1 - - # decrement 'n' (to handle cases when `n` itself is a power of 2) - n = n - 1 - - # loop until only one bit is left - while n & n - 1: - # unset rightmost bit - n = n & n - 1 - return n << 1 - def twos_comp(self, val, bitwidth): return (val + (1 << bitwidth)) % (1 << bitwidth) @@ -678,7 +661,7 @@ def get_dynamic_config(self, model, address_stride=1): thresholds = model.get_initializer(self.onnx_node.input[1]) num_channels, num_weights_per_channel = thresholds.shape - weight_addr_boundary = self.find_next_power_of_2(num_weights_per_channel) + weight_addr_boundary = find_next_power_of_2(num_weights_per_channel) # Make sure that the next power of 2 (output) is greater than the input assert weight_addr_boundary >= num_weights_per_channel From ca0042225c006d4545e26b0e0f1221ecd4ab68c3 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 12:58:01 +0000 Subject: [PATCH 046/111] [thresholding] replace math functions with existing functions Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index c681bb2631..9113e4f9d9 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -647,12 +647,6 @@ def get_verilog_top_module_intf_names(self): return intf_names - def twos_comp(self, val, bitwidth): - return (val + (1 << bitwidth)) % (1 << bitwidth) - - def prep_axilite_val(self, val): - return self.twos_comp(int(val), self.get_weight_datatype().bitwidth()) - def get_dynamic_config(self, model, address_stride=1): """Returns a configuration dictionary containing axilite write commands in order to program the thresholds into the RTL core during runtime. @@ -677,7 +671,7 @@ def get_dynamic_config(self, model, address_stride=1): ) config[key_name] = ( channel_start_addr + addr, - self.prep_axilite_val(weight), + int(str(pack_innermost_dim_as_hex_string([weight], self.get_weight_datatype(), self.get_weight_datatype().bitwidth())), 0), ) weight_cntr += 1 From 7f3455fc0d1dafedaf8cdfca8144dea41747a624 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 13:16:11 +0000 Subject: [PATCH 047/111] [thresholding] remove convept of mem_mode for RTL thresholding binary search node Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 38 ------------ ...fpgadataflow_thresholding_binary_search.py | 58 +------------------ 2 files changed, 1 insertion(+), 95 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 9113e4f9d9..954850562e 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -88,19 +88,6 @@ def get_nodeattr_types(self): # [4] is four vectors (like a FC layer with batch=4) # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) "numInputVectors": ("ints", False, [1]), - # memory mode for the thresholds - # const -- embedded thresholds, default - # decoupled -- streaming thresholds with streamer packaged inside IP - "mem_mode": ("s", False, "const", {"const", "decoupled"}), - # (mem_mode = decoupled only) whether weights (thresholds) will be - # writable through an AXI-lite interface during runtime - # 1 for enabled, 0 for disabled. - # see finn-rtllib/memstream/doc/README for more about the memory - # address map used for writable weights - # IMPORTANT: After using AXI lite to either read or write the weights, - # always "flush" the accelerator by first passing a dummy input - # vector through the accelerator. This will get rid of any old - # weight data from the weight FIFOs. "gen_top_module": ("s", False, ""), "activation_bias": ("i", False, 0), "clkFreq": ("i", False, 200000000), @@ -150,12 +137,6 @@ def get_outstream_width(self, ind=0): return o_bits * self.get_nodeattr("PE") def get_weightstream_width(self): - # Only 'decoupled' mode is supported - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": - raise Exception( - "Unrecognized memory mode for this node: {}".format(mem_mode) - ) pe = self.get_nodeattr("PE") wp = self.get_weight_datatype().bitwidth() n_thres_steps = self.get_nodeattr("numSteps") @@ -442,13 +423,6 @@ def code_generation_ipgen(self, model, fpgapart, clk): self.generate_params(model, code_gen_dir) def generate_params(self, model, path): - # Only 'decoupled' mode is supported - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": - raise Exception( - "Unrecognized memory mode for this node: {}".format(mem_mode) - ) - code_gen_dir = path weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir) thresholds = model.get_initializer(self.onnx_node.input[1]) @@ -508,12 +482,6 @@ def execute_node(self, context, graph): self.get_nodeattr("exec_mode"), "rtlsim" ) ) - if self.get_nodeattr("mem_mode") != "decoupled": - raise Exception( - "Invalid mem_mode value: {}; mem_mode must be set to '{}'".format( - self.get_nodeattr("mem_mode"), "decoupled" - ) - ) node = self.onnx_node code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") @@ -635,12 +603,6 @@ def get_verilog_top_module_intf_names(self): Each block must have at most one aximm and one axilite.""" intf_names = super().get_verilog_top_module_intf_names() - # Only 'decoupled' mode is supported - check before adding axilite interface - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode != "decoupled": - raise Exception( - "Unrecognized memory mode for this node: {}".format(mem_mode) - ) intf_names["axilite"] = ["s_axilite"] intf_names["s_axis"] = [["s_axis"]] intf_names["m_axis"] = [["m_axis"]] diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 29fc2828b6..7ef5da8f23 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -93,7 +93,6 @@ def make_single_thresholding_binary_search_modelwrapper( input_data_type, output_data_type, activation_bias, - mem_mode, num_input_vecs, ): @@ -121,7 +120,6 @@ def make_single_thresholding_binary_search_modelwrapper( weightDataType=input_data_type.name, outputDataType=output_data_type.name, activation_bias=activation_bias, - mem_mode=mem_mode, numInputVectors=num_input_vecs, ) graph = helper.make_graph( @@ -142,54 +140,6 @@ def make_single_thresholding_binary_search_modelwrapper( return model -# Test brief: Prove that memory mode 'const' is not supported for this layer type -@pytest.mark.fpgadataflow -@pytest.mark.vivado -def test_fpgadataflow_thresholding_binary_search_const_mem_mode(): - input_data_type = DataType["INT16"] - activation = DataType["INT4"] - fold = -1 - num_input_channels = 16 - mem_mode = "const" - - pe = generate_pe_value(fold, num_input_channels) - num_input_vecs = [1, 2, 2] - output_data_type = activation - activation_bias = output_data_type.min() - - # Generate random thresholds and sort in ascending order - num_steps = activation.get_num_possible_values() - 1 - thresholds = generate_random_threshold_values( - input_data_type, num_input_channels, num_steps - ) - - # Generate model from input parameters to the test - model = make_single_thresholding_binary_search_modelwrapper( - thresholds, - pe, - input_data_type, - output_data_type, - activation_bias, - mem_mode, - num_input_vecs, - ) - - # Prove that 'const' memory mode is not supported for this class - # 'const' memory mode is not supported for this class, catch the specific exception - # thrown by FINN. Exception: ('Unrecognized memory mode for this node:', 'const') - try: - model = model.transform(InsertFIFO(True)) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model = model.transform(HLSSynthIP()) - model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) - except Exception as e: - if str(e) != "Unrecognized memory mode for this node: {}".format(mem_mode): - raise - # Caught the expected exception, leave the test early - return - - # Test brief: Test that PrepareRTLSim() runs successfully. This function is not # tested in test_fpgadataflow_thresholding_binary_search() @pytest.mark.fpgadataflow @@ -199,7 +149,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): act = DataType["INT4"] fold = -1 num_input_channels = 16 - mem_mode = "decoupled" # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) @@ -226,7 +175,6 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): input_data_type, output_data_type, activation_bias, - mem_mode, num_input_vecs, ) @@ -245,15 +193,12 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) @pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail @pytest.mark.parametrize("num_input_channels", [16]) -# no need to test 'const' mode, it's already done in: -# test_fpgadataflow_thresholding_binary_search_const_mem_mode() -@pytest.mark.parametrize("mem_mode", ["decoupled"]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_thresholding_binary_search( - activation, input_data_type, fold, num_input_channels, mem_mode, exec_mode + activation, input_data_type, fold, num_input_channels, exec_mode ): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) @@ -304,7 +249,6 @@ def test_fpgadataflow_thresholding_binary_search( input_data_type, output_data_type, activation_bias, - mem_mode, num_input_vecs, ) From 4bc69f1a374821b16b80826946223a0a36cae787 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 16:20:02 +0000 Subject: [PATCH 048/111] [thresholding] add methods needed for convertingToHls transformation Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 954850562e..c342d235d9 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -101,10 +101,23 @@ def calc_tmem(self): return num_channels // pe def make_shape_compatible_op(self, model): - return [] + oshape = self.get_normal_output_shape() + return super().make_const_shape_op(oshape) def infer_node_datatype(self, model): - return + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype().name), + str(idt.name), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) def verify_node(self): return [] @@ -126,7 +139,28 @@ def get_weight_datatype(self): return DataType[self.get_nodeattr("weightDataType")] def minimize_accumulator_width(self, model): - return None + "Minimize threshold width ('accumulator width' here due to convention)" + thresholds = model.get_initializer(self.onnx_node.input[1]) + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + min_input = self.get_input_datatype().min() + max_input = self.get_input_datatype().max() + # get range required by threshold values + tdt_min = min(min_input, min_threshold) + tdt_max = max(max_input, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(-tdt_max - 1) + else: + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + self.set_nodeattr("weightDataType", tdt.name) + return DataType[self.get_nodeattr("weightDataType")] def get_instream_width(self, ind=0): i_bits = self.get_input_datatype().bitwidth() From 3b6a1980b8ac28f5a809125d1e06eeb5ab2ba3b5 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 16:32:23 +0000 Subject: [PATCH 049/111] [thresholding] add convertingToHls transformation for thresholding binary search RTL node Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/convert_to_hls_layers.py | 93 +++++++++++++++---- 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 525af7ea92..17f839c5c5 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1062,9 +1062,10 @@ def apply(self, model): class InferThresholdingLayer(Transformation): """Convert any MultiThreshold into a standalone thresholding HLS layer.""" - def __init__(self, mem_mode="const"): + def __init__(self, mem_mode="const", use_rtl_variant=False): super().__init__() self.mem_mode = mem_mode + self.use_rtl_variant = use_rtl_variant def apply(self, model): graph = model.graph @@ -1118,26 +1119,78 @@ def apply(self, model): ) actval = int(actval) assert (not odt.signed()) or (actval < 0), ( - node.name + ": Signed output requres actval < 0" - ) - # create and insert new Thresholding_Batch node - new_node = helper.make_node( - "Thresholding_Batch", - [thl_input, thl_threshold], - [thl_output], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=ifc, - PE=pe, - numSteps=thl_thres_shape[1], - inputDataType=idt.name, - weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth - outputDataType=odt.name, - numInputVectors=list(thl_in_shape[:-1]), - ActVal=actval, - mem_mode=self.mem_mode, - name="Thresholding_Batch_" + node.name, + node.name + ": Signed output requires actval < 0" ) + + # Ensure that RTL variant is not inserted for unsupported configuration + is_rtl_variant_compatible = True + + # Perform checks for RTL variant if chosen + if self.use_rtl_variant: + # Check memory mode + if self.mem_mode != "decoupled": + warnings.warn( + """%s : RTL Thresholding does not support 'decoupled' memory mode. + Falling back to HLS implementation.""" + % node.name + ) + is_rtl_variant_compatible = False + + # Check PE/SIMD value + if pe != 1: + warnings.warn( + """%s : RTL Thresholding does not support paralellisation. + Only a PE value of 1 is supported. + Falling back to HLS implementation.""" + % node.name + ) + is_rtl_variant_compatible = False + + if self.use_rtl_variant and is_rtl_variant_compatible: + new_node = helper.make_node( + "Thresholding_Binary_Search", + [thl_input, thl_threshold], + [thl_output], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=ifc, + PE=pe, + numSteps=thl_thres_shape[1], + inputDataType=idt.name, + weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth + outputDataType=odt.name, + numInputVectors=list(thl_in_shape[:-1]), + activation_bias=actval, + mem_mode=self.mem_mode, + name="Thresholding_Binary_Search_" + node.name, + ) + else: + if self.use_rtl_variant: + warnings.warn( + """%s : RTL Thresholding requested for unsupported + configuration. Falling back to HLS implementation.""" + % node.name + ) + + # create and insert new Thresholding_Batch node + new_node = helper.make_node( + "Thresholding_Batch", + [thl_input, thl_threshold], + [thl_output], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=ifc, + PE=pe, + numSteps=thl_thres_shape[1], + inputDataType=idt.name, + weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth + outputDataType=odt.name, + numInputVectors=list(thl_in_shape[:-1]), + ActVal=actval, + mem_mode=self.mem_mode, + name="Thresholding_Batch_" + node.name, + ) + graph.node.insert(insert_point, new_node) # remove old node graph.node.remove(node) From b3800cd7e258cecb0466cb9238eeb37ff738d660 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 16:34:09 +0000 Subject: [PATCH 050/111] [thresholding] add test for convertingToHls transformation for thresholding binary search node Signed-off-by: Fionn O'Donohoe --- .../test_convert_to_hls_thresholding.py | 322 ++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100755 tests/fpgadataflow/test_convert_to_hls_thresholding.py diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py new file mode 100755 index 0000000000..30932638b6 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -0,0 +1,322 @@ +# Copyright (C) 2023, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import TensorProto, helper +from pyverilator.util.axi_utils import axilite_write, reset_rtlsim +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.general.multithreshold import multithreshold +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor + +from finn.core.rtlsim_exec import rtlsim_exec +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode + +test_fpga_part = "xczu3eg-sbva484-1-e" +target_clk_ns = 5 + + +# Helper functions +def sort_thresholds_increasing(thresholds): + return np.sort(thresholds, axis=1) + + +def generate_random_threshold_values(input_data_type, num_input_channels, num_steps): + return np.random.randint( + input_data_type.min(), + input_data_type.max() + 1, + (num_input_channels, num_steps), + ).astype(np.float32) + + +def generate_pe_value(fold, num_input_channels): + if fold == -1: + fold = num_input_channels + pe = num_input_channels // fold + assert num_input_channels % pe == 0 + return pe + + +# n = batch, c = channel, h = height, w = width of feature map +# Standard = NCHW; FINN = NHWC +# Convert from NCHW to NHWC +def convert_np_array_to_finn_data_layout(data): + return np.transpose(data, (0, 2, 3, 1)) + + +# n = batch, c = channel, h = height, w = width of feature map +# Standard = NCHW; FINN = NHWC +# Convert from NHWC to NCHW +def convert_np_array_to_standard_data_layout(data): + return np.transpose(data, (0, 3, 1, 2)) + + +def make_single_thresholding_binary_search_modelwrapper( + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + num_input_vecs, +): + NumChannels = thresholds.shape[0] + + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + + node_inp_list = ["inp", "thresh"] + + Thresholding_node = helper.make_node( + "Thresholding_Binary_Search", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=NumChannels, + PE=pe, + numSteps=thresholds.shape[1], + inputDataType=input_data_type.name, + weightDataType=input_data_type.name, + outputDataType=output_data_type.name, + numInputVectors=num_input_vecs, + activation_bias=activation_bias, + ) + graph = helper.make_graph( + nodes=[Thresholding_node], + name="thresholding_graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="thresholding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", input_data_type) + model.set_tensor_datatype("outp", output_data_type) + + model.set_tensor_datatype("thresh", input_data_type) + model.set_initializer("thresh", thresholds) + return model + + +def make_single_multithresholding_modelwrapper( + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + num_input_vecs, +): + NumChannels = thresholds.shape[0] + + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels] + ) + + node_inp_list = ["inp", "thresh"] + + Multithresholding_node = helper.make_node( + "MultiThreshold", + node_inp_list, + ["outp"], + domain="qonnx.custom_op.general", + out_dtype=output_data_type.name, + out_bias=float(activation_bias), + out_scale=1.0, + ) + + graph = helper.make_graph( + nodes=[Multithresholding_node], + name="multithresholding_graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="multithresholding-model") + model = ModelWrapper(model) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(GiveUniqueNodeNames()) + + model.set_tensor_datatype("inp", input_data_type) + model.set_tensor_datatype("outp", output_data_type) + + model.set_tensor_datatype("thresh", input_data_type) + model.set_initializer("thresh", thresholds) + return model + + +@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) +@pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) +@pytest.mark.parametrize("fold", [-1]) +@pytest.mark.parametrize("num_input_channels", [16]) +@pytest.mark.parametrize("mem_mode", ["decoupled", "const"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_input_channels, mem_mode): + # Handle inputs to the test + pe = generate_pe_value(fold, num_input_channels) + num_steps = activation.get_num_possible_values() - 1 + + # Cppsim is not supported for this node (as it is an RTL node) + if mem_mode == "const": + pytest.skip("const memory mode not supported for RTL Thresholding Binary Search node") + elif mem_mode != "decoupled": + raise Exception("Unknown mem_mode: {}".format(mem_mode)) + + if activation == DataType["BIPOLAR"]: + pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node") + + # Other non-input parameters + num_input_vecs = [1, 2, 2] + output_data_type = activation + if output_data_type == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = output_data_type.min() + + # generate random input data + tensor_shape = tuple(num_input_vecs + [num_input_channels]) + x = gen_finn_dt_tensor(input_data_type, tensor_shape) + + # Generate random thresholds and sort in ascending order + thresholds = generate_random_threshold_values( + input_data_type, num_input_channels, num_steps + ) + + # provide non-decreasing/ascending thresholds + thresholds = sort_thresholds_increasing(thresholds) + + x_nhwc = convert_np_array_to_standard_data_layout(x) + y = multithreshold(x_nhwc, thresholds) + + # convert back to NHWC for comparison to hw outputs + y = convert_np_array_to_finn_data_layout(y) + if activation == DataType["BIPOLAR"]: + # binary to bipolar + y = 2 * y - 1 + else: + # signed offset + y += activation.min() + + # Generate model from input parameters to the test + model = make_single_thresholding_binary_search_modelwrapper( + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + num_input_vecs, + ) + + model = model.transform(InsertFIFO(True)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + + # Retrieve the axilite programming sequence for weights - for decoupled mode only + tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] + tbs_inst = getCustomOp(tbs_node) + config = tbs_inst.get_dynamic_config(model) + + # Reshape generated data (not from model) + oshape = model.get_tensor_shape("outp") + y_expected = y.reshape(oshape) + + # Helper function that delivers the hook to program the thresholds via AXI-Lite + def config_hook(config): + if config is None: + return None + + def write_thresh_config(sim): + # axi_name = "s_axilite_0_" # works + axi_name = getCustomOp( + model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] + ).get_verilog_top_module_intf_names()["axilite"][0] + axi_name += "_0_" + + # Write config registers to the Threshold memory. + # The dictionary defines (addr, value) tuples. + for config_entry in config.values(): + addr = config_entry[0] + val = config_entry[1] + axilite_write(sim, addr, val, basename=axi_name) + + reset_rtlsim(sim) + + return write_thresh_config + + input_dict = {"inp": x} + rtlsim_exec(model, input_dict, pre_hook=config_hook(config)) + y_produced = input_dict["outp"] + assert (y_produced == y_expected).all() + + #### Make a Multithreshold graph and convert to thresholding binary search node + new_model = make_single_multithresholding_modelwrapper( + thresholds, + pe, + input_data_type, + output_data_type, + activation_bias, + num_input_vecs, + ) + + # Recreate the model using the ConvertToHLS transform + new_model = new_model.transform(to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True)) + new_model = new_model.transform(InsertFIFO(True)) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + + input_dict = {"inp": x} + rtlsim_exec(new_model, input_dict, pre_hook=config_hook(config)) + y_produced_new = input_dict["outp"] + assert (y_produced_new == y_expected).all() From 11464d87c4857dd2227935c198adbb6115250fe3 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 16:35:32 +0000 Subject: [PATCH 051/111] [thresholding] skip tests with unsupported folding factor input Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 6 +++++- .../test_fpgadataflow_thresholding_binary_search.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 30932638b6..3b56f40d9c 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -194,7 +194,7 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) -@pytest.mark.parametrize("fold", [-1]) +@pytest.mark.parametrize("fold", [-1, 1, 2]) @pytest.mark.parametrize("num_input_channels", [16]) @pytest.mark.parametrize("mem_mode", ["decoupled", "const"]) @pytest.mark.fpgadataflow @@ -213,6 +213,10 @@ def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_i if activation == DataType["BIPOLAR"]: pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node") + # Paralellisation not supported for thresholding binary search rtl node + if pe != 1: + pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node") + # Other non-input parameters num_input_vecs = [1, 2, 2] output_data_type = activation diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 7ef5da8f23..0be91a2569 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -191,7 +191,7 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): # N.B. - fold factor of '-1' is supported only (no PE/SIMD support) @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) -@pytest.mark.parametrize("fold", [-1]) # 1, 2, etc. will fail +@pytest.mark.parametrize("fold", [-1, 1, 2]) @pytest.mark.parametrize("num_input_channels", [16]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @@ -204,6 +204,10 @@ def test_fpgadataflow_thresholding_binary_search( pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 + # Paralellisation not supported for thresholding binary search rtl node + if pe != 1: + pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node") + # Cppsim is not supported for this node (as it is an RTL node) if exec_mode == "cppsim": pytest.skip("cppsim not supported for RTL Thresholding Binary Search node") From e71b1c0e1487befd8ec04ac6ebcc0caf8d63b4a3 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 16:45:16 +0000 Subject: [PATCH 052/111] [thresholding] add comments for attributes Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index c342d235d9..711e3a8270 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -88,8 +88,12 @@ def get_nodeattr_types(self): # [4] is four vectors (like a FC layer with batch=4) # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) "numInputVectors": ("ints", False, [1]), + # name of the top module in verilog template. Used by PyVerilator + # and IPI generation "gen_top_module": ("s", False, ""), + # bias to be applied to outputs of the node "activation_bias": ("i", False, 0), + # used for IPI step "clkFreq": ("i", False, 200000000), } my_attrs.update(super().get_nodeattr_types()) From 3be1140fe68058c55fc1e3685609b6964ce7e993 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 17:01:05 +0000 Subject: [PATCH 053/111] [thresholding] replace min() with signed() function Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 711e3a8270..2073e95b41 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -393,7 +393,7 @@ def prepare_codegen_rtl_values(self): # Is the input datatype signed or unsigned? # The thresholding core needs to know this when comparing weights to inputs - if self.get_input_datatype().min() < 0: + if self.get_input_datatype().signed(): code_gen_dict["$SIGN$"] = ["signed"] else: code_gen_dict["$SIGN$"] = ["unsigned"] From e05effc20cd2e357f5bba38d2e320144b313c9f5 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 17:40:42 +0000 Subject: [PATCH 054/111] [thresholding] fix formatting from pre-commit Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 13 +++++++++-- .../fpgadataflow/convert_to_hls_layers.py | 6 ++--- src/finn/util/basic.py | 1 + .../test_convert_to_hls_thresholding.py | 22 ++++++++++++++----- ...fpgadataflow_thresholding_binary_search.py | 4 +++- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 2073e95b41..d5d5c48cce 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -38,7 +38,7 @@ ) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir, find_next_power_of_2 +from finn.util.basic import find_next_power_of_2, get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import ( npy_to_rtlsim_input, pack_innermost_dim_as_hex_string, @@ -671,7 +671,16 @@ def get_dynamic_config(self, model, address_stride=1): ) config[key_name] = ( channel_start_addr + addr, - int(str(pack_innermost_dim_as_hex_string([weight], self.get_weight_datatype(), self.get_weight_datatype().bitwidth())), 0), + int( + str( + pack_innermost_dim_as_hex_string( + [weight], + self.get_weight_datatype(), + self.get_weight_datatype().bitwidth(), + ) + ), + 0, + ), ) weight_cntr += 1 diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 17f839c5c5..a0461bda82 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1167,10 +1167,10 @@ def apply(self, model): else: if self.use_rtl_variant: warnings.warn( - """%s : RTL Thresholding requested for unsupported + """%s : RTL Thresholding requested for unsupported configuration. Falling back to HLS implementation.""" - % node.name - ) + % node.name + ) # create and insert new Thresholding_Batch node new_node = helper.make_node( diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 9a66cf90eb..8782bd7f8c 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -217,6 +217,7 @@ def is_exe(fpath): return None + def find_next_power_of_2(n): # Negative values will loop infinitely below - return 0 if n <= 0: diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 3b56f40d9c..d0502a9b74 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -40,8 +40,8 @@ from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.basic import gen_finn_dt_tensor -from finn.core.rtlsim_exec import rtlsim_exec import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.core.rtlsim_exec import rtlsim_exec from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP @@ -199,23 +199,31 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.parametrize("mem_mode", ["decoupled", "const"]) @pytest.mark.fpgadataflow @pytest.mark.vivado -def test_convert_to_hls_tbs_rtl_variant(activation, input_data_type, fold, num_input_channels, mem_mode): +def test_convert_to_hls_tbs_rtl_variant( + activation, input_data_type, fold, num_input_channels, mem_mode +): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 # Cppsim is not supported for this node (as it is an RTL node) if mem_mode == "const": - pytest.skip("const memory mode not supported for RTL Thresholding Binary Search node") + pytest.skip( + "const memory mode not supported for RTL Thresholding Binary Search node" + ) elif mem_mode != "decoupled": raise Exception("Unknown mem_mode: {}".format(mem_mode)) if activation == DataType["BIPOLAR"]: - pytest.skip("Only negative activations are supported for RTL Thresholding Binary Search node") + pytest.skip( + "Only negative activations are supported for RTL Thresholding Binary Search node" + ) # Paralellisation not supported for thresholding binary search rtl node if pe != 1: - pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node") + pytest.skip( + "Paralellisation of IP not supported for RTL Thresholding Binary Search node" + ) # Other non-input parameters num_input_vecs = [1, 2, 2] @@ -313,7 +321,9 @@ def write_thresh_config(sim): ) # Recreate the model using the ConvertToHLS transform - new_model = new_model.transform(to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True)) + new_model = new_model.transform( + to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True) + ) new_model = new_model.transform(InsertFIFO(True)) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP(test_fpga_part, target_clk_ns)) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 0be91a2569..f1a03a3a89 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -206,7 +206,9 @@ def test_fpgadataflow_thresholding_binary_search( # Paralellisation not supported for thresholding binary search rtl node if pe != 1: - pytest.skip("Paralellisation of IP not supported for RTL Thresholding Binary Search node") + pytest.skip( + "Paralellisation of IP not supported for RTL Thresholding Binary Search node" + ) # Cppsim is not supported for this node (as it is an RTL node) if exec_mode == "cppsim": From 48c33042bbc7b17f98510a8299504e4d36c3a2e8 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 17:47:16 +0000 Subject: [PATCH 055/111] [thresholding] fix more flake8 formatting Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/convert_to_hls_layers.py | 4 ++-- .../fpgadataflow/test_convert_to_hls_thresholding.py | 12 +++++------- .../test_fpgadataflow_thresholding_binary_search.py | 5 ++--- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index a0461bda82..f6dd466fab 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1157,7 +1157,7 @@ def apply(self, model): PE=pe, numSteps=thl_thres_shape[1], inputDataType=idt.name, - weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth + weightDataType=idt.name, outputDataType=odt.name, numInputVectors=list(thl_in_shape[:-1]), activation_bias=actval, @@ -1183,7 +1183,7 @@ def apply(self, model): PE=pe, numSteps=thl_thres_shape[1], inputDataType=idt.name, - weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth + weightDataType=idt.name, outputDataType=odt.name, numInputVectors=list(thl_in_shape[:-1]), ActVal=actval, diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index d0502a9b74..2785d91617 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -42,14 +42,10 @@ import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.core.rtlsim_exec import rtlsim_exec -from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO -from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim -from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -209,14 +205,16 @@ def test_convert_to_hls_tbs_rtl_variant( # Cppsim is not supported for this node (as it is an RTL node) if mem_mode == "const": pytest.skip( - "const memory mode not supported for RTL Thresholding Binary Search node" + "const memory mode not supported for " \ + "RTL Thresholding Binary Search node" ) elif mem_mode != "decoupled": raise Exception("Unknown mem_mode: {}".format(mem_mode)) if activation == DataType["BIPOLAR"]: pytest.skip( - "Only negative activations are supported for RTL Thresholding Binary Search node" + "Only negative activations are supported for " \ + "RTL Thresholding Binary Search node" ) # Paralellisation not supported for thresholding binary search rtl node @@ -310,7 +308,7 @@ def write_thresh_config(sim): y_produced = input_dict["outp"] assert (y_produced == y_expected).all() - #### Make a Multithreshold graph and convert to thresholding binary search node + # Make a Multithreshold graph and convert to thresholding binary search node new_model = make_single_multithresholding_modelwrapper( thresholds, pe, diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index f1a03a3a89..a4eab1e181 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -39,11 +39,9 @@ from qonnx.util.basic import gen_finn_dt_tensor from finn.core.rtlsim_exec import rtlsim_exec -from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO -from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode @@ -207,7 +205,8 @@ def test_fpgadataflow_thresholding_binary_search( # Paralellisation not supported for thresholding binary search rtl node if pe != 1: pytest.skip( - "Paralellisation of IP not supported for RTL Thresholding Binary Search node" + "Paralellisation of IP not supported for " \ + "RTL Thresholding Binary Search node" ) # Cppsim is not supported for this node (as it is an RTL node) From 1e8a36ca3712100caeed506976a92c7e2ee4b4c4 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 17:55:20 +0000 Subject: [PATCH 056/111] [thresholding] remove backslashes for flake8 Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 4 ++-- .../test_fpgadataflow_thresholding_binary_search.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 2785d91617..217ee39d74 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -205,7 +205,7 @@ def test_convert_to_hls_tbs_rtl_variant( # Cppsim is not supported for this node (as it is an RTL node) if mem_mode == "const": pytest.skip( - "const memory mode not supported for " \ + "const memory mode not supported for " "RTL Thresholding Binary Search node" ) elif mem_mode != "decoupled": @@ -213,7 +213,7 @@ def test_convert_to_hls_tbs_rtl_variant( if activation == DataType["BIPOLAR"]: pytest.skip( - "Only negative activations are supported for " \ + "Only negative activations are supported for " "RTL Thresholding Binary Search node" ) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index a4eab1e181..049d65835f 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -205,7 +205,7 @@ def test_fpgadataflow_thresholding_binary_search( # Paralellisation not supported for thresholding binary search rtl node if pe != 1: pytest.skip( - "Paralellisation of IP not supported for " \ + "Paralellisation of IP not supported for " "RTL Thresholding Binary Search node" ) From 08f1b5f49e0d5180fa739056209bc5f0a8589c7e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 18:00:57 +0000 Subject: [PATCH 057/111] [thresholding] more flake8 fixes Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 217ee39d74..45705dc833 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -205,8 +205,7 @@ def test_convert_to_hls_tbs_rtl_variant( # Cppsim is not supported for this node (as it is an RTL node) if mem_mode == "const": pytest.skip( - "const memory mode not supported for " - "RTL Thresholding Binary Search node" + "const memory mode not supported for this node" ) elif mem_mode != "decoupled": raise Exception("Unknown mem_mode: {}".format(mem_mode)) From 481d773257e41ad04f2bb5e1b614decfac4312ab Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 18:02:55 +0000 Subject: [PATCH 058/111] [thresholding] undo flake8 fixes Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 45705dc833..cee06ebec9 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -204,9 +204,7 @@ def test_convert_to_hls_tbs_rtl_variant( # Cppsim is not supported for this node (as it is an RTL node) if mem_mode == "const": - pytest.skip( - "const memory mode not supported for this node" - ) + pytest.skip("const memory mode not supported for this node") elif mem_mode != "decoupled": raise Exception("Unknown mem_mode: {}".format(mem_mode)) From a51bef4e3ea906b056eb7fb3fbb114a2ae12b6aa Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 5 Jan 2023 18:04:59 +0000 Subject: [PATCH 059/111] [thresholding] another flake8 fix Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index cee06ebec9..07821983e1 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -217,7 +217,7 @@ def test_convert_to_hls_tbs_rtl_variant( # Paralellisation not supported for thresholding binary search rtl node if pe != 1: pytest.skip( - "Paralellisation of IP not supported for RTL Thresholding Binary Search node" + "Paralellisation not supported for RTL Thresholding Binary Search node" ) # Other non-input parameters From 2c313ad01465f66a9e6f367cf6552f64b6a1dab3 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:11:25 +0000 Subject: [PATCH 060/111] [thresholding] remove cppsim test file generation Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index d5d5c48cce..1a5faad72a 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -239,13 +239,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): np.mod(orig_thres_matrix, 1), 0 ).all(), "Need int threshold tensor" ret = orig_thres_matrix - # workaround for vivado_hls threshold bug - if ret[0][0] == 0 and n_thres_steps == 1: - ret = np.copy(ret) - ret[0][0] = 1 - warnings.warn( - "Setting 0-valued first threshold to 1 to avoid vivado_hls bug" - ) # ensure channels = mh , duplicating if necessary if ret.shape[0] == 1: ret = np.tile(ret, (mh, 1)) From 49bdd28e4edc3d47ccb57161e073fcde2a2cb216 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:14:22 +0000 Subject: [PATCH 061/111] [thresholding] remove unnecessary data generation functions for simulators Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 107 ------------------ 1 file changed, 107 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 1a5faad72a..7b37b2029a 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -261,84 +261,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): rows between PEs is not as expected (n_thres_steps)""" return ret.reshape(1, pe, tmem, n_thres_steps) - def make_weight_file(self, weights, weight_file_mode, weight_file_name): - """Produce a file containing given weights (thresholds) in appropriate - format for this layer. This file can be used for either synthesis or - run-time reconfig of weights. - - Arguments: - * weights : numpy array with weights to be put into the file - * weight_file_mode : one of {hls_header, decoupled_verilog_dat, - decoupled_runtime} - * weight_file_name : filename for the weight file to be generated - """ - # There are 'decoupled_*' flavors, just make sure that the flavors - # are decoupled related - if "decoupled" not in weight_file_mode: - raise Exception( - "Unrecognized memory mode for this node: {}".format(weight_file_mode) - ) - - threshold_tensor = self.get_hls_compatible_threshold_tensor(weights) - tdt = self.get_weight_datatype() - assert np.vectorize(tdt.allowed)( - threshold_tensor - ).all(), "Thresholds can't be expressed with type %s" % str(tdt) - - # streaming thresholds need to be organized differently - # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps) - decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3)) - # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps) - pe = self.get_nodeattr("PE") - n_thres_steps = self.get_nodeattr("numSteps") - decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2) - decoupled_thres = decoupled_thres.reshape(1, -1, pe * n_thres_steps) - decoupled_thres = decoupled_thres.copy() - decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape( - 1, -1, pe * n_thres_steps - ) - decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy() - - if weight_file_mode == "decoupled_npy": - # save weight stream into npy for cppsim - np.save(weight_file_name, decoupled_thres) - elif weight_file_mode == "decoupled_verilog_dat": - # convert weight values into hexstring - weight_width = self.get_weightstream_width() - # pad to nearest 4 bits to get hex strings - weight_width_padded = roundup_to_integer_multiple(weight_width, 4) - weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( - decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="" - ) - weight_stream = weight_tensor_pe_flipped.flatten() - weight_stream = weight_stream.copy() - with open(weight_file_name, "w") as f: - for val in weight_stream: - f.write(val + "\n") - elif weight_file_mode == "decoupled_runtime": - # memstream axi-lite interface will map each mem line to - # one or multiple 32-bit words - weight_width = self.get_weightstream_width() - words_per_memwidth = 2 ** ceil(log2(weight_width / 32)) - if words_per_memwidth < 1: - words_per_memwidth = 1 - weight_width_padded = words_per_memwidth * 32 - # first, pack and ensure padding to 32 bits - weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( - decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="" - ) - weight_stream = weight_tensor_pe_flipped.flatten() - weight_stream = weight_stream.copy() - with open(weight_file_name, "w") as f: - for val in weight_stream: - # split into groups of 8 hex digits (= 32 bits) - words_32b = textwrap.wrap(val, 8) - words_32b.reverse() - for word_32b in words_32b: - f.write(word_32b + "\n") - else: - raise Exception("Decoupled weight export not yet implemented") - # Get the integer from the DataType and string-ify it # This assumes that the data is in the form "INTx" or similar def conv_datatype_to_str(self, data_type): @@ -449,35 +371,6 @@ def code_generation_ipgen(self, model, fpgapart, clk): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - - # Generate params for RTLSim - self.generate_params(model, code_gen_dir) - - def generate_params(self, model, path): - code_gen_dir = path - weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir) - thresholds = model.get_initializer(self.onnx_node.input[1]) - self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim) - - # Verilog.dat thresholds: - # also save weights as Verilog .dat file - # note that we provide two different .dat files, one for synth - # and one for synthesis. this is because URAM-based weights always - # need zero weights for synthesis, otherwise they get inferred - # as BRAM - weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir) - weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir) - # sim weights are always the true weights - self.make_weight_file( - thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim - ) - - # Synthesis thresholds: - synth_thresholds = thresholds - self.make_weight_file( - synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth - ) - return def prepare_rtlsim(self): From e663030e98dc6c1f194ccec1d8e5d65b9599c19c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:27:46 +0000 Subject: [PATCH 062/111] [thresholding] remove potentially problematic helper function Signed-off-by: Fionn O'Donohoe --- .../thresholding_binary_search.py | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 7b37b2029a..b14eaa1669 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -261,14 +261,6 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): rows between PEs is not as expected (n_thres_steps)""" return ret.reshape(1, pe, tmem, n_thres_steps) - # Get the integer from the DataType and string-ify it - # This assumes that the data is in the form "INTx" or similar - def conv_datatype_to_str(self, data_type): - # Handle the case that an int is passed to the function - if isinstance(data_type, int): - return str(data_type) - return str(DataType[data_type].bitwidth()) - def prepare_codegen_rtl_values(self): """All dictionary values produced in this function are to replace their key value(s) in the RTL template files""" @@ -294,16 +286,16 @@ def prepare_codegen_rtl_values(self): bias = self.get_nodeattr("activation_bias") # activation bias value code_gen_dict["$N$"] = [ - self.conv_datatype_to_str(output_data_type) - ] # output precision + str(DataType[output_data_type].bitwidth()) + ] # output precision - convert bitwidth to string code_gen_dict["$M$"] = [ - self.conv_datatype_to_str(input_data_type) - ] # input/threshold precision + str(DataType[input_data_type].bitwidth()) + ] # input/threshold precision - convert bitwidth to string code_gen_dict["$C$"] = [ - self.conv_datatype_to_str(num_channels) + str(num_channels) ] # number of channels code_gen_dict["$BIAS$"] = [ - self.conv_datatype_to_str(bias) + str(bias) ] # activation bias value # Is the input datatype signed or unsigned? From 42dbf23938fdd1a302e88706302980c718a66d05 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:35:36 +0000 Subject: [PATCH 063/111] [thresholding] implement flake8 formatting Signed-off-by: Fionn O'Donohoe --- .../custom_op/fpgadataflow/thresholding_binary_search.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index b14eaa1669..6dc9130792 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -291,12 +291,8 @@ def prepare_codegen_rtl_values(self): code_gen_dict["$M$"] = [ str(DataType[input_data_type].bitwidth()) ] # input/threshold precision - convert bitwidth to string - code_gen_dict["$C$"] = [ - str(num_channels) - ] # number of channels - code_gen_dict["$BIAS$"] = [ - str(bias) - ] # activation bias value + code_gen_dict["$C$"] = [str(num_channels)] # number of channels + code_gen_dict["$BIAS$"] = [str(bias)] # activation bias value # Is the input datatype signed or unsigned? # The thresholding core needs to know this when comparing weights to inputs From 933d7476d3336a6aec9c4dea852acb25ebdf4b46 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:38:11 +0000 Subject: [PATCH 064/111] [thresholding] remove unused imports Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 6dc9130792..9e1dd454f1 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -28,9 +28,7 @@ import numpy as np import os -import textwrap import warnings -from math import ceil, log2 from qonnx.core.datatype import DataType from qonnx.util.basic import ( interleave_matrix_outer_dim_from_partitions, From 5c6dcd9b89a7b35328676855c5c5ac13e06da90f Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:40:13 +0000 Subject: [PATCH 065/111] [thresholding] remove last ununsed import Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 9e1dd454f1..a2e0f404b2 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -32,7 +32,6 @@ from qonnx.core.datatype import DataType from qonnx.util.basic import ( interleave_matrix_outer_dim_from_partitions, - roundup_to_integer_multiple, ) from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp From 51acd119eb8864ff302d0f040fcb0307c2778ccf Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 6 Jan 2023 11:42:12 +0000 Subject: [PATCH 066/111] [thresholding] reformat existing import Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index a2e0f404b2..595a643acc 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -30,9 +30,7 @@ import os import warnings from qonnx.core.datatype import DataType -from qonnx.util.basic import ( - interleave_matrix_outer_dim_from_partitions, -) +from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.basic import find_next_power_of_2, get_rtlsim_trace_depth, make_build_dir From b886a5ae08e608808795bc584da0650eb8ff260f Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 18 Jan 2023 11:25:51 +0000 Subject: [PATCH 067/111] [Docs] Add bin search thresholding to docs generation --- docs/finn/source_code/finn.custom_op.fpgadataflow.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst index fdcf44c6d9..3627855cfb 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst @@ -203,6 +203,14 @@ finn.custom\_op.fpgadataflow.thresholding\_batch :undoc-members: :show-inheritance: +finn.custom\_op.fpgadataflow.thresholding\_binary\_search +----------------------------------------------------------- + +.. automodule:: finn.custom_op.fpgadataflow.thresholding_binary_search + :members: + :undoc-members: + :show-inheritance: + finn.custom\_op.fpgadataflow.tlastmarker ----------------------------------------------- From 2c3de2ab7ad12c89ee4af52e611532ff4255e258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 23 Jan 2023 07:28:39 +0000 Subject: [PATCH 068/111] Corrected address width in Verilog wrapper for thresholding. --- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index e3f8596bc8..5068cb549c 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -51,7 +51,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( // Writing input s_axilite_AWVALID, output s_axilite_AWREADY, - input [C_BITS+N-1:0] s_axilite_AWADDR, + input [C_BITS+N+1:0] s_axilite_AWADDR, input s_axilite_WVALID, output s_axilite_WREADY, From 7c9f5d8805b288a299cd1970d797af0d24327577 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 11:57:16 +0000 Subject: [PATCH 069/111] [thresholding] remove bug affecting input width in top level wrapper The C_BITS parameter is calculating the correct width needed for the top level wrapper for the thresholding binary search IP. However, the parameter is not 'synthesizing' correctly and does not update the width for the affected s_axilite_AWADDR signal. This results in the MSBs of the input signal being truncated. These missing bits affected addressing when writing weights into the core. Weights were written to the incorrect addresses in the core causing incorrect thresholding to occur. Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 5068cb549c..768e7b6a5b 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -38,7 +38,6 @@ module $MODULE_NAME_AXI_WRAPPER$ #( parameter C = $C$, // Channels int BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) - parameter C_BITS = C < 2 ? 1 : $clog2(C), parameter O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) @@ -49,9 +48,9 @@ module $MODULE_NAME_AXI_WRAPPER$ #( //- AXI Lite ------------------------ // Writing - input s_axilite_AWVALID, - output s_axilite_AWREADY, - input [C_BITS+N+1:0] s_axilite_AWADDR, + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [$clog2(C)+N+1:0] s_axilite_AWADDR, input s_axilite_WVALID, output s_axilite_WREADY, From 3a0d59dd6717daedb043ea83d6873e6c663b0d06 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 11:59:17 +0000 Subject: [PATCH 070/111] [thresholding] adjust thresholding binary search tests to use word addressing for programming thresholds with axilite Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 2 +- .../test_fpgadataflow_thresholding_binary_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 07821983e1..9486513402 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -271,7 +271,7 @@ def test_convert_to_hls_tbs_rtl_variant( # Retrieve the axilite programming sequence for weights - for decoupled mode only tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] tbs_inst = getCustomOp(tbs_node) - config = tbs_inst.get_dynamic_config(model) + config = tbs_inst.get_dynamic_config(model, 4) # Reshape generated data (not from model) oshape = model.get_tensor_shape("outp") diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 049d65835f..2a34971f0d 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -266,7 +266,7 @@ def test_fpgadataflow_thresholding_binary_search( # Retrieve the axilite programming sequence for weights - for decoupled mode only tbs_node = model.get_nodes_by_op_type("Thresholding_Binary_Search")[0] tbs_inst = getCustomOp(tbs_node) - config = tbs_inst.get_dynamic_config(model) + config = tbs_inst.get_dynamic_config(model, 4) # Reshape generated data (not from model) oshape = model.get_tensor_shape("outp") From 757e3a1398948878e866f4fe5fe1747206a1c7d9 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 12:05:40 +0000 Subject: [PATCH 071/111] [thresholding] adjust typo in exception Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 595a643acc..f2f9e133b2 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -421,7 +421,7 @@ def execute_node(self, context, graph): reshaped_input, ) elif in_ind > 2: - raise Exception("Unexpected input found for Thresholding_Batch") + raise Exception("Unexpected input found for Thresholding_Binary_Search") in_ind += 1 # Create a PyVerilator wrapper of the RTLSim .so From 479575b224559680c559c7af5fd4f09582529919 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 12:07:22 +0000 Subject: [PATCH 072/111] [thresholding] undo copyright header change - only needed for new files Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 0e17726d48..dc9a5a349a 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022, Advanced Micro Devices, Inc. +# Copyright (c) 2020, Xilinx # All rights reserved. # # Redistribution and use in source and binary forms, with or without From 0d99b6c8ed358b2feea41cc8af242d40b30c8d97 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 12:54:19 +0000 Subject: [PATCH 073/111] [thresholding] add docstring for migrated find_next_power_of_2() function Signed-off-by: Fionn O'Donohoe --- src/finn/util/basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 8782bd7f8c..ee185aa94f 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -219,6 +219,7 @@ def is_exe(fpath): def find_next_power_of_2(n): + """For any integer 'n', find the next greatest power of 2""" # Negative values will loop infinitely below - return 0 if n <= 0: return 0 From 5a77a326558de1ecd59e61aae38575b73ac54b1b Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 12:55:40 +0000 Subject: [PATCH 074/111] [thresholding] add docstrings for methods not in base class Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/thresholding_binary_search.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index f2f9e133b2..7dfcd91d58 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -95,6 +95,7 @@ def get_nodeattr_types(self): return my_attrs def calc_tmem(self): + """Calculates and returns TMEM.""" num_channels = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") return num_channels // pe @@ -104,6 +105,8 @@ def make_shape_compatible_op(self, model): return super().make_const_shape_op(oshape) def infer_node_datatype(self, model): + """Used for FINN DataType inference: set the output tensors' datatypes + accordingly for this node""" node = self.onnx_node idt = model.get_tensor_datatype(node.input[0]) if idt != self.get_input_datatype(): @@ -119,6 +122,8 @@ def infer_node_datatype(self, model): model.set_tensor_datatype(node.output[0], odt) def verify_node(self): + """Required by the FINN nalysis module. Checks if custom ops in graph + are correctly built, with all attributes and inputs.""" return [] def bram_estimation(self): @@ -170,6 +175,7 @@ def get_outstream_width(self, ind=0): return o_bits * self.get_nodeattr("PE") def get_weightstream_width(self): + """Returns weight stream width""" pe = self.get_nodeattr("PE") wp = self.get_weight_datatype().bitwidth() n_thres_steps = self.get_nodeattr("numSteps") @@ -299,20 +305,24 @@ def prepare_codegen_rtl_values(self): return code_gen_dict def get_rtl_file_list(self): + """Thresholding binary search RTL file list""" return ["thresholding.sv", "thresholding_axi.sv", "thresholding_axi_wrapper.v"] def get_rtl_file_paths(self): + """Get full path of all RTL files""" rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/" rtl_file_list = self.get_rtl_file_list() rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list] return rtl_file_paths def get_rtl_template_data(self, path): + """Return RTL file contents as a template""" with open(path, "r") as f: template = f.read() return template def fill_in_rtl_template_data(self, replace_dict, template_data): + """Use attribute values to finn in RTL template placeholders""" template_data_cp = template_data for key in replace_dict: replacement_line = "\n".join(replace_dict[key]) @@ -320,11 +330,13 @@ def fill_in_rtl_template_data(self, replace_dict, template_data): return template_data_cp def dump_rtl_data(self, dest_dir, filename, data): + """Dump filled-in-template RTL files for future synthesis step""" with open(os.path.join(dest_dir, filename), "w") as f: f.write(data) return def generate_hdl(self): + """Prepare HDL files from templates for synthesis""" # Generate a dictionary of values to put in RTL template code_gen_dict = self.prepare_codegen_rtl_values() From eeed0702125de77c293a4a702f213a1035829179 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Mon, 23 Jan 2023 12:56:22 +0000 Subject: [PATCH 075/111] [thresholding] remove unused method Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 7dfcd91d58..94182b4ea0 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -209,9 +209,6 @@ def get_number_output_values(self): def get_exp_cycles(self): return 0 - def get_template_param_values(self): - return dict() - def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): """Convert the original numpy weight matrix orig_weight_matrix into a form suitable for passing to the hlslib call: From c2708686e22c9eaff18a5314c2f470fbbcb819f0 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 27 Jan 2023 14:55:16 +0000 Subject: [PATCH 076/111] [thresholding] remove 'return' at end of function - not needed Signed-off-by: Fionn O'Donohoe --- tests/util/test_basic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/util/test_basic.py b/tests/util/test_basic.py index d2586f4f19..97a8c50261 100755 --- a/tests/util/test_basic.py +++ b/tests/util/test_basic.py @@ -58,5 +58,3 @@ def test_next_power_of_2(): output = basic.find_next_power_of_2(test_dict["input"]) assert output >= test_dict["input"] assert output == test_dict["expected_result"] - - return From af22177e50ae808072d87a9d0c5260ccb6c3a67f Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 27 Jan 2023 14:59:48 +0000 Subject: [PATCH 077/111] [thresholding] remove cppsim exec_mode from test - not exercised Signed-off-by: Fionn O'Donohoe --- .../test_fpgadataflow_thresholding_binary_search.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index 2a34971f0d..e57c4942c8 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -191,12 +191,11 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) @pytest.mark.parametrize("fold", [-1, 1, 2]) @pytest.mark.parametrize("num_input_channels", [16]) -@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_thresholding_binary_search( - activation, input_data_type, fold, num_input_channels, exec_mode + activation, input_data_type, fold, num_input_channels ): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) @@ -209,12 +208,6 @@ def test_fpgadataflow_thresholding_binary_search( "RTL Thresholding Binary Search node" ) - # Cppsim is not supported for this node (as it is an RTL node) - if exec_mode == "cppsim": - pytest.skip("cppsim not supported for RTL Thresholding Binary Search node") - elif exec_mode != "rtlsim": - raise Exception("Unknown exec_mode: {}".format(exec_mode)) - # Other non-input parameters num_input_vecs = [1, 2, 2] output_data_type = activation From fab120b8218b2bacf8a94a23c7d250d0c5df12b6 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 27 Jan 2023 15:02:38 +0000 Subject: [PATCH 078/111] [thresholding] remove unused attributes Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 94182b4ea0..43ae8e8233 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -75,9 +75,6 @@ def get_nodeattr_types(self): "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), "outputDataType": ("s", True, ""), - # input and output FIFO depths - "inFIFODepth": ("i", False, 0), - "outFIFODepth": ("i", False, 0), # number of input vectors, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) From 5d6c964443e0c41865a18e862830e0c27a307dd1 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 27 Jan 2023 15:47:41 +0000 Subject: [PATCH 079/111] [thresholding] adjust i/o port names on thresholding RTL wrapper Originally s_axis and m_axis port names on the thresholding RTL wrapper could synthesise, but did not adhere to the FINN i/o signal naming convention. The FINN compiler would not recognise the IP being synthesised and would rely on the IP having the correct IP/signal wiring steps in place. The FINN compiler did not recognise s_axis/m_axis signal naming and therefore did not automatically set the clock frequency of the IP to match the rest of the network. This required a Tcl command to set the clock frequency of the IP, as well a user-configurable attribute to set the clock frequency for ease-of-use. It turns out that this actually reduces user ease-of-use. Having the compiler take care of the clock signalling is preferred. To do this, the s_axis/m_axis signals are renamed to in0_V/out_V, as the compiler expects, and this extra 'user configurability' can therefore be removed. Signed-off-by: Fionn O'Donohoe --- .../hdl/thresholding_axi_wrapper.v | 26 ++++++++++--------- .../thresholding_binary_search.py | 19 -------------- 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index 768e7b6a5b..c16bf264dd 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -43,7 +43,9 @@ module $MODULE_NAME_AXI_WRAPPER$ #( /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) )( //- Global Control ------------------ + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) input ap_clk, + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) input ap_rst_n, //- AXI Lite ------------------------ @@ -72,14 +74,14 @@ module $MODULE_NAME_AXI_WRAPPER$ #( output [ 1:0] s_axilite_RRESP, //- AXI Stream - Input -------------- - output s_axis_tready, - input s_axis_tvalid, - input [((M+7)/8)*8-1:0] s_axis_tdata, + output in0_V_TREADY, + input in0_V_TVALID, + input [((M+7)/8)*8-1:0] in0_V_TDATA, //- AXI Stream - Output ------------- - input m_axis_tready, - output m_axis_tvalid, - output [((O_BITS+7)/8)*8-1:0] m_axis_tdata + input out_V_TREADY, + output out_V_TVALID, + output [((O_BITS+7)/8)*8-1:0] out_V_TDATA ); $MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( @@ -113,14 +115,14 @@ module $MODULE_NAME_AXI_WRAPPER$ #( .s_axilite_RRESP(s_axilite_RRESP), //- AXI Stream - Input -------------- - .s_axis_tready(s_axis_tready), - .s_axis_tvalid(s_axis_tvalid), - .s_axis_tdata(s_axis_tdata), + .s_axis_tready(in0_V_TREADY), + .s_axis_tvalid(in0_V_TVALID), + .s_axis_tdata(in0_V_TDATA), //- AXI Stream - Output ------------- - .m_axis_tready(m_axis_tready), - .m_axis_tvalid(m_axis_tvalid), - .m_axis_tdata(m_axis_tdata) + .m_axis_tready(out_V_TREADY), + .m_axis_tvalid(out_V_TVALID), + .m_axis_tdata(out_V_TDATA) ); endmodule : $MODULE_NAME_AXI_WRAPPER$ diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 43ae8e8233..97c9dd82c6 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -85,8 +85,6 @@ def get_nodeattr_types(self): "gen_top_module": ("s", False, ""), # bias to be applied to outputs of the node "activation_bias": ("i", False, 0), - # used for IPI step - "clkFreq": ("i", False, 200000000), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -477,10 +475,6 @@ def code_generation_ipi(self): cmd = [] rtl_file_list = self.get_rtl_file_list() code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - node_name = self.onnx_node.name - dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0] - din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0] - clock_freq = self.get_nodeattr("clkFreq") for rtl_file in rtl_file_list: cmd.append( @@ -493,16 +487,6 @@ def code_generation_ipi(self): % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) ) - cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]" - % (clock_freq, "get_bd_intf_pins", node_name, din_name) - ) - - cmd.append( - "set_property -dict [list CONFIG.FREQ_HZ {%d}] [%s %s/%s]" - % (clock_freq, "get_bd_intf_pins", node_name, dout_name) - ) - return cmd def get_verilog_top_module_intf_names(self): @@ -517,9 +501,6 @@ def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() intf_names["axilite"] = ["s_axilite"] - intf_names["s_axis"] = [["s_axis"]] - intf_names["m_axis"] = [["m_axis"]] - return intf_names def get_dynamic_config(self, model, address_stride=1): From bdfa6cb97096680247b6648edf20d4c519dcad16 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 31 Jan 2023 11:41:24 +0000 Subject: [PATCH 080/111] [thresholding] remove duplicated test helper function Signed-off-by: Fionn O'Donohoe --- .../test_convert_to_hls_thresholding.py | 54 +------------------ 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 9486513402..84521b395c 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -46,6 +46,8 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from test_fpgadataflow_thresholding_binary_search import make_single_thresholding_binary_search_modelwrapper + test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -86,58 +88,6 @@ def convert_np_array_to_standard_data_layout(data): return np.transpose(data, (0, 3, 1, 2)) -def make_single_thresholding_binary_search_modelwrapper( - thresholds, - pe, - input_data_type, - output_data_type, - activation_bias, - num_input_vecs, -): - NumChannels = thresholds.shape[0] - - inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, num_input_vecs + [NumChannels] - ) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, num_input_vecs + [NumChannels] - ) - - node_inp_list = ["inp", "thresh"] - - Thresholding_node = helper.make_node( - "Thresholding_Binary_Search", - node_inp_list, - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=NumChannels, - PE=pe, - numSteps=thresholds.shape[1], - inputDataType=input_data_type.name, - weightDataType=input_data_type.name, - outputDataType=output_data_type.name, - numInputVectors=num_input_vecs, - activation_bias=activation_bias, - ) - graph = helper.make_graph( - nodes=[Thresholding_node], - name="thresholding_graph", - inputs=[inp], - outputs=[outp], - ) - - model = helper.make_model(graph, producer_name="thresholding-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", input_data_type) - model.set_tensor_datatype("outp", output_data_type) - - model.set_tensor_datatype("thresh", input_data_type) - model.set_initializer("thresh", thresholds) - return model - - def make_single_multithresholding_modelwrapper( thresholds, pe, From 6809351c5210c87a199e8b4167fa54b2dd9a48c8 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 31 Jan 2023 12:24:41 +0000 Subject: [PATCH 081/111] [thresholding] assert on finding unsupported memory mode for thresholding binary search HLS conversion function Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/convert_to_hls_layers.py | 12 ++++-------- .../fpgadataflow/test_convert_to_hls_thresholding.py | 11 ++--------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index f6dd466fab..1a331b059f 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1127,14 +1127,10 @@ def apply(self, model): # Perform checks for RTL variant if chosen if self.use_rtl_variant: - # Check memory mode - if self.mem_mode != "decoupled": - warnings.warn( - """%s : RTL Thresholding does not support 'decoupled' memory mode. - Falling back to HLS implementation.""" - % node.name - ) - is_rtl_variant_compatible = False + assert self.mem_mode == "decoupled", ( + """%s : RTL Thresholding only supports 'decoupled' memory mode.""" + % node.name + ) # Check PE/SIMD value if pe != 1: diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 84521b395c..d07ffd2cbf 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -142,22 +142,15 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) @pytest.mark.parametrize("fold", [-1, 1, 2]) @pytest.mark.parametrize("num_input_channels", [16]) -@pytest.mark.parametrize("mem_mode", ["decoupled", "const"]) @pytest.mark.fpgadataflow @pytest.mark.vivado def test_convert_to_hls_tbs_rtl_variant( - activation, input_data_type, fold, num_input_channels, mem_mode + activation, input_data_type, fold, num_input_channels, ): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 - # Cppsim is not supported for this node (as it is an RTL node) - if mem_mode == "const": - pytest.skip("const memory mode not supported for this node") - elif mem_mode != "decoupled": - raise Exception("Unknown mem_mode: {}".format(mem_mode)) - if activation == DataType["BIPOLAR"]: pytest.skip( "Only negative activations are supported for " @@ -267,7 +260,7 @@ def write_thresh_config(sim): # Recreate the model using the ConvertToHLS transform new_model = new_model.transform( - to_hls.InferThresholdingLayer(mem_mode=mem_mode, use_rtl_variant=True) + to_hls.InferThresholdingLayer(mem_mode="decoupled", use_rtl_variant=True) ) new_model = new_model.transform(InsertFIFO(True)) new_model = new_model.transform(GiveUniqueNodeNames()) From 4515cf7c6d4e55f8dfca62b52b504e2666a6b497 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 31 Jan 2023 12:29:42 +0000 Subject: [PATCH 082/111] [thresholding] precommit fix Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index d07ffd2cbf..75c4ef599c 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -46,7 +46,9 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from test_fpgadataflow_thresholding_binary_search import make_single_thresholding_binary_search_modelwrapper +from test_fpgadataflow_thresholding_binary_search import ( + make_single_thresholding_binary_search_modelwrapper, +) test_fpga_part = "xczu3eg-sbva484-1-e" @@ -145,7 +147,10 @@ def make_single_multithresholding_modelwrapper( @pytest.mark.fpgadataflow @pytest.mark.vivado def test_convert_to_hls_tbs_rtl_variant( - activation, input_data_type, fold, num_input_channels, + activation, + input_data_type, + fold, + num_input_channels, ): # Handle inputs to the test pe = generate_pe_value(fold, num_input_channels) From b51498ef84edcd9362f4f83270f9ae39f5d7980f Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 31 Jan 2023 12:39:39 +0000 Subject: [PATCH 083/111] [thresholding] precommit fix 2 Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 75c4ef599c..09067564eb 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -39,6 +39,9 @@ from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.basic import gen_finn_dt_tensor +from test_fpgadataflow_thresholding_binary_search import ( + make_single_thresholding_binary_search_modelwrapper, +) import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.core.rtlsim_exec import rtlsim_exec @@ -46,10 +49,6 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from test_fpgadataflow_thresholding_binary_search import ( - make_single_thresholding_binary_search_modelwrapper, -) - test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 From ff3b2014d5de4bf2a98c321d14bce15a9862bf74 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 31 Jan 2023 12:48:23 +0000 Subject: [PATCH 084/111] [thresholding] precommit fix 3 Signed-off-by: Fionn O'Donohoe --- src/finn/transformation/fpgadataflow/convert_to_hls_layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 1a331b059f..1bc5fee664 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1128,7 +1128,8 @@ def apply(self, model): # Perform checks for RTL variant if chosen if self.use_rtl_variant: assert self.mem_mode == "decoupled", ( - """%s : RTL Thresholding only supports 'decoupled' memory mode.""" + """%s : RTL Thresholding only supports 'decoupled' memory + mode.""" % node.name ) From fc7e00db46414b88f3e1c3d3dc9dff4cf6bc84ff Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 23 Mar 2023 14:59:46 +0000 Subject: [PATCH 085/111] [thresholding] adjust templates so that .sv files are modular and can be used as standalone IP Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 4 ++-- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 6 +++--- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- .../custom_op/fpgadataflow/thresholding_binary_search.py | 6 +----- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index b26747d1ff..c7d5c86f6d 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -43,7 +43,7 @@ * threshold configuration relies on a channel address prefix. Inputs are * accompanied by a channel selector. *****************************************************************************/ -module $MODULE_NAME$ #( +module thresholding #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C, // number of channels @@ -153,4 +153,4 @@ module $MODULE_NAME$ #( assign ocnl = pipe[N].cnl; assign odat = pipe[N].res + BIAS; -endmodule : $MODULE_NAME$ +endmodule : thresholding diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 5cd7746b82..79383c7996 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -38,7 +38,7 @@ * - performs aligned byte address to parameter word address translation. *****************************************************************************/ -module $MODULE_NAME_AXI$ #( +module thresholding_axi #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C, // Channels @@ -197,7 +197,7 @@ module $MODULE_NAME_AXI$ #( end // Core Thresholding Module - $MODULE_NAME$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( + thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( .clk, .rst, .twe, .twa, .twd, .en, @@ -205,4 +205,4 @@ module $MODULE_NAME_AXI$ #( .ovld, .ocnl(), .odat ); -endmodule : $MODULE_NAME_AXI$ +endmodule : thresholding_axi diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index c16bf264dd..e46d0046ee 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -84,7 +84,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( output [((O_BITS+7)/8)*8-1:0] out_V_TDATA ); - $MODULE_NAME_AXI$ #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 97c9dd82c6..9b02248185 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -259,11 +259,7 @@ def prepare_codegen_rtl_values(self): their key value(s) in the RTL template files""" code_gen_dict = {} - # Identify the module names - code_gen_dict["$MODULE_NAME$"] = [self.get_verilog_top_module_name()] - code_gen_dict["$MODULE_NAME_AXI$"] = [ - self.get_verilog_top_module_name() + "_axi" - ] + # Identify the module name code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [ self.get_verilog_top_module_name() + "_axi_wrapper" ] From f530aba05b05a59c5cd05b749666f89b82706cba Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 23 Mar 2023 15:50:43 +0000 Subject: [PATCH 086/111] [thresholding]: remove SIGN template in thresholding RTL and create parameter instead for more modular RTL Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 18 +++++++++++------- .../thresholding/hdl/thresholding_axi.sv | 3 ++- .../hdl/thresholding_axi_wrapper.v | 3 ++- .../fpgadataflow/thresholding_binary_search.py | 4 ++-- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index c7d5c86f6d..deff4fe0f8 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -48,6 +48,7 @@ module thresholding #( int unsigned M, // input/threshold precision int unsigned C, // number of channels + bit SIGNED, // signed inputs int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) int unsigned C_BITS, @@ -68,7 +69,7 @@ module thresholding #( // Input Stream input logic ivld, input logic [C_BITS-1:0] icnl, // Ignored for C == 1 - input logic $SIGN$ [M -1:0] idat, + input logic [M -1:0] idat, // Output Stream output logic ovld, @@ -80,7 +81,7 @@ module thresholding #( typedef struct packed { logic vld; // Valid data identification logic [C_BITS-1:0] cnl; // Channel - logic $SIGN$ [M -1:0] val; // Original input value + logic [M -1:0] val; // Original input value logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage } pipe_t; uwire pipe_t pipe[0:N]; @@ -91,13 +92,13 @@ module thresholding #( for(genvar stage = 0; stage < N; stage++) begin : genStages // Threshold Memory - uwire $SIGN$ [M-1:0] thresh; + uwire [M-1:0] thresh; if(1) begin : blkUpdate // Write control: local select from global address uwire we = twe && tws[stage]; if((C == 1) && (stage == 0)) begin - logic $SIGN$ [M-1:0] Thresh = 'x; + logic [M-1:0] Thresh = 'x; always_ff @(posedge clk) begin if(rst) Thresh <= 'x; else if(we) Thresh <= twd; @@ -105,7 +106,7 @@ module thresholding #( assign thresh = Thresh; end else begin - logic $SIGN$ [M-1:0] Threshs[C * 2**stage]; + logic [M-1:0] Threshs[C * 2**stage]; uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; uwire [$clog2(C)+stage-1:0] ra; if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; @@ -117,7 +118,7 @@ module thresholding #( end // Read - logic $SIGN$ [M-1:0] RdReg; + logic [M-1:0] RdReg; always_ff @(posedge clk) begin if(en) RdReg <= Threshs[ra]; end @@ -135,9 +136,12 @@ module thresholding #( // Assemble pipeline data logic [0:N-1] res; + uwire cmp = + SIGNED? $signed(thresh) <= $signed(State.val) : + /* else */ $unsigned(thresh) <= $unsigned(State.val); always_comb begin res = State.res; - res[stage] = thresh <= State.val; // Patch in next result bit + res[stage] = cmp; // Patch in next result bit end assign pipe[stage+1] = '{ vld: State.vld, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 79383c7996..6099a64746 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -43,6 +43,7 @@ module thresholding_axi #( int unsigned M, // input/threshold precision int unsigned C, // Channels + bit SIGNED, // signed inputs int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) int unsigned O_BITS @@ -197,7 +198,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( + thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( .clk, .rst, .twe, .twa, .twd, .en, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index e46d0046ee..caf850b5bc 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -36,6 +36,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( parameter N = $N$, // output precision parameter M = $M$, // input/threshold precision parameter C = $C$, // Channels + parameter SIGNED = $SIGNED$, // signed inputs int BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) parameter O_BITS = BIAS > 0? @@ -84,7 +85,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( output [((O_BITS+7)/8)*8-1:0] out_V_TDATA ); - thresholding_axi #(.N(N), .M(M), .C(C), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 9b02248185..af9e1173fb 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -286,9 +286,9 @@ def prepare_codegen_rtl_values(self): # Is the input datatype signed or unsigned? # The thresholding core needs to know this when comparing weights to inputs if self.get_input_datatype().signed(): - code_gen_dict["$SIGN$"] = ["signed"] + code_gen_dict["$SIGNED$"] = [str(1)] else: - code_gen_dict["$SIGN$"] = ["unsigned"] + code_gen_dict["$SIGNED$"] = [str(0)] return code_gen_dict From 3cd600cce8e1ff98161c55dce232d703173fa569 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 23 Mar 2023 16:20:49 +0000 Subject: [PATCH 087/111] [thresholding]: decouple thresholding core from axi wrapper by removing input parameters Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 6 ++++-- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 8 +++++--- finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index deff4fe0f8..52d0b41b33 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -51,8 +51,10 @@ module thresholding #( bit SIGNED, // signed inputs int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) - int unsigned C_BITS, - int unsigned O_BITS + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + localparam int unsigned O_BITS = BIAS >= 0? + /* unsigned */ $clog2(2**N+BIAS) : + /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) )( // Global Control input logic clk, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 6099a64746..4bb3add13b 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -46,7 +46,10 @@ module thresholding_axi #( bit SIGNED, // signed inputs int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) - int unsigned O_BITS + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + localparam int unsigned O_BITS = BIAS >= 0? + /* unsigned */ $clog2(2**N+BIAS) : + /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) )( //- Global Control ------------------ input logic ap_clk, @@ -173,7 +176,6 @@ module thresholding_axi #( end : blkOutputDecouple - localparam int unsigned C_BITS = C < 2? 1 : $clog2(C); uwire ivld = s_axis_tvalid; uwire [C_BITS-1:0] icnl; uwire [M -1:0] idat = s_axis_tdata[M-1:0]; @@ -198,7 +200,7 @@ module thresholding_axi #( end // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS), .C_BITS(C_BITS)) core ( + thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) core ( .clk, .rst, .twe, .twa, .twd, .en, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index caf850b5bc..da013b667a 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -85,7 +85,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( output [((O_BITS+7)/8)*8-1:0] out_V_TDATA ); - thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS), .O_BITS(O_BITS)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), From 54afa637d2b7beac8beca99979e2d727385b90f3 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Tue, 28 Mar 2023 17:17:13 +0100 Subject: [PATCH 088/111] [thresholding]: patch in PE value to the thresholding AXI module and wrapper Signed-off-by: Fionn O'Donohoe --- .../thresholding/hdl/thresholding_axi.sv | 117 ++++++++++++------ .../hdl/thresholding_axi_wrapper.v | 7 +- .../thresholding_binary_search.py | 2 + 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 4bb3add13b..506e31b215 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -42,11 +42,14 @@ module thresholding_axi #( int unsigned N, // output precision int unsigned M, // input/threshold precision int unsigned C, // Channels + int unsigned PE, // Processing Parallelism, requires C = M*PE bit SIGNED, // signed inputs int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) - localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + localparam int unsigned CF = 1 + (C-1)/PE, // Channel Fold + localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, + localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE), localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) @@ -57,9 +60,9 @@ module thresholding_axi #( //- AXI Lite ------------------------ // Writing - input logic s_axilite_AWVALID, - output logic s_axilite_AWREADY, - input logic [$clog2(C)+N+1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored + input logic s_axilite_AWVALID, + output logic s_axilite_AWREADY, + input logic [ADDR_BITS-1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored input logic s_axilite_WVALID, output logic s_axilite_WREADY, @@ -83,33 +86,53 @@ module thresholding_axi #( //- AXI Stream - Input -------------- output logic s_axis_tready, input logic s_axis_tvalid, - input logic [((M+7)/8)*8-1:0] s_axis_tdata, + input logic [((PE*M+7)/8)*8-1:0] s_axis_tdata, //- AXI Stream - Output ------------- input logic m_axis_tready, output logic m_axis_tvalid, - output logic [((O_BITS+7)/8)*8-1:0] m_axis_tdata + output logic [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata ); + //- Parameter Constraints Checking -------------------------------------- + initial begin + if(C%PE != 0) begin + $error("%m: Channel count C=%0d is not a multiple of PE=%0d.", C, PE); + $finish; + end + end + //- Global Control ------------------------------------------------------ uwire clk = ap_clk; uwire rst = !ap_rst_n; //- AXI Lite: Threshold Configuration ----------------------------------- - uwire twe; - uwire [$clog2(C)+N-1:0] twa; - uwire [ M-1:0] twd; + uwire twe[PE]; + uwire [$clog2(CF)+N-1:0] twa; + uwire [ M-1:0] twd; if(1) begin : blkAxiLite logic WABusy = 0; logic WDBusy = 0; - logic [$clog2(C)+N-1:0] Addr = 'x; - logic [ M-1:0] Data = 'x; + logic Sel[PE] = '{ default: 'x }; + logic [$clog2(CF)+N-1:0] Addr = 'x; + logic [ M-1:0] Data = 'x; - assign twe = WABusy && WDBusy; + for(genvar pe = 0; pe < PE; pe++) begin + assign twe[pe] = WABusy && WDBusy && Sel[pe]; + end assign twa = Addr; assign twd = Data; - uwire clr_wr = rst || (twe && s_axilite_BREADY); - always_ff @(posedge clk) begin : blockName + if(PE == 1) always_comb Sel[0] = 1; + else begin + always_ff @(posedge clk) begin + if(!WABusy) begin + foreach(Sel[pe]) Sel[pe] <= s_axilite_AWADDR[N+2+:$clog2(PE)] == pe; + end + end + end + + uwire clr_wr = rst || (WABusy && WDBusy && s_axilite_BREADY); + always_ff @(posedge clk) begin if(clr_wr) begin WABusy <= 0; Addr <= 'x; @@ -119,7 +142,8 @@ module thresholding_axi #( else begin if(!WABusy) begin WABusy <= s_axilite_AWVALID; - Addr <= s_axilite_AWADDR[$clog2(C)+N+1:2]; + Addr[0+:N] <= s_axilite_AWADDR[2+:N]; + if(C > 1) Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)]; end if(!WDBusy) begin WDBusy <= s_axilite_WVALID; @@ -148,39 +172,48 @@ module thresholding_axi #( //- IO-Sandwich with two-stage output buffer for containing a local enable uwire en; - uwire [O_BITS-1:0] odat; - uwire ovld; + uwire [PE-1:0][O_BITS-1:0] odat; + uwire ovld[PE]; if(1) begin : blkOutputDecouple typedef struct { - logic vld; - logic [O_BITS-1:0] dat; + logic vld; + logic [PE-1:0][O_BITS-1:0] dat; } buf_t; - buf_t Buf[2] = '{ default: '{ vld: 0, dat: 'x } }; + buf_t A = '{ vld: 0, dat: 'x }; + buf_t B = '{ vld: 0, dat: 'x }; always_ff @(posedge clk) begin - if(rst) Buf <= '{ default: '{ vld: 0, dat: 'x } }; + if(rst) begin + A <= '{ vld: 0, dat: 'x }; + B <= '{ vld: 0, dat: 'x }; + end else begin - if(!Buf[1].vld || m_axis_tready) begin - Buf[1] <= '{ - vld: Buf[0].vld || ovld, - dat: Buf[0].vld? Buf[0].dat : odat + if(!B.vld || m_axis_tready) begin + B <= '{ + vld: A.vld || ovld[0], + dat: A.vld? A.dat : odat }; end - Buf[0].vld <= Buf[1].vld && !m_axis_tready && (Buf[0].vld || ovld); - if(!Buf[0].vld) Buf[0].dat <= odat; + A.vld <= B.vld && !m_axis_tready && (A.vld || ovld[0]); + if(!A.vld) A.dat <= odat; end end - assign en = !Buf[0].vld; + assign en = !A.vld; - assign m_axis_tvalid = Buf[1].vld; - assign m_axis_tdata = Buf[1].dat; + assign m_axis_tvalid = B.vld; + assign m_axis_tdata = B.dat; end : blkOutputDecouple + // localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE); uwire ivld = s_axis_tvalid; uwire [C_BITS-1:0] icnl; - uwire [M -1:0] idat = s_axis_tdata[M-1:0]; + uwire [M -1:0] idat[PE]; + for(genvar pe = 0; pe < PE; pe++) begin + assign idat[pe] = s_axis_tdata[pe*M+:M]; + end + assign s_axis_tready = en; - if(C == 1) assign icnl = 'x; + if(C == PE) assign icnl = 'x; else begin logic [C_BITS-1:0] Chnl = 0; logic Last = 0; @@ -193,19 +226,21 @@ module thresholding_axi #( end else if(inc) begin Chnl <= Chnl + 1; - Last <= (~Chnl & (C-2)) == 0; + Last <= (~Chnl & (C/PE-2)) == 0; end end assign icnl = Chnl; end - // Core Thresholding Module - thresholding #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) core ( - .clk, .rst, - .twe, .twa, .twd, - .en, - .ivld, .icnl, .idat, - .ovld, .ocnl(), .odat - ); + // Core Thresholding Modules + for(genvar pe = 0; pe < PE; pe++) begin : genCores + thresholding #(.N(N), .M(M), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core ( + .clk, .rst, + .twe(twe[pe]), .twa, .twd, + .en, + .ivld, .icnl, .idat(idat[pe]), + .ovld(ovld[pe]), .ocnl(), .odat(odat[pe]) + ); + end : genCores endmodule : thresholding_axi diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index da013b667a..c27480f388 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -38,6 +38,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( parameter C = $C$, // Channels parameter SIGNED = $SIGNED$, // signed inputs int BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) + parameter PE = $PE$, parameter O_BITS = BIAS > 0? /* unsigned */ $clog2(2**N-BIAS) : @@ -77,15 +78,15 @@ module $MODULE_NAME_AXI_WRAPPER$ #( //- AXI Stream - Input -------------- output in0_V_TREADY, input in0_V_TVALID, - input [((M+7)/8)*8-1:0] in0_V_TDATA, + input [((PE*M+7)/8)*8-1:0] in0_V_TDATA, //- AXI Stream - Output ------------- input out_V_TREADY, output out_V_TVALID, - output [((O_BITS+7)/8)*8-1:0] out_V_TDATA + output [((PE*O_BITS+7)/8)*8-1:0] out_V_TDATA ); - thresholding_axi #(.N(N), .M(M), .C(C), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( + thresholding_axi #(.N(N), .M(M), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index af9e1173fb..e2453fcaad 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -273,6 +273,7 @@ def prepare_codegen_rtl_values(self): ) # input/threshold precision num_channels = self.get_nodeattr("NumChannels") # number of channels bias = self.get_nodeattr("activation_bias") # activation bias value + pe = self.get_nodeattr("PE") code_gen_dict["$N$"] = [ str(DataType[output_data_type].bitwidth()) @@ -282,6 +283,7 @@ def prepare_codegen_rtl_values(self): ] # input/threshold precision - convert bitwidth to string code_gen_dict["$C$"] = [str(num_channels)] # number of channels code_gen_dict["$BIAS$"] = [str(bias)] # activation bias value + code_gen_dict["$PE$"] = [str(pe)] # requires C = M*PE # Is the input datatype signed or unsigned? # The thresholding core needs to know this when comparing weights to inputs From 29f9e1ce637d5df00cb1dae2ada3438070da0852 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Thu, 30 Mar 2023 15:43:36 +0100 Subject: [PATCH 089/111] [thresholding]: remove reset that erases the 0th stage threshold value There is a corner case where the number of channels configured for the thresholding core is 1 and during programming the stage 0 threshold parameter. For each other stage in this case (and all other cases) the threshold parameters are non-volatile. When a reset happens after programming the threshold parameters, all would still be intact except for the 0th stage threshold value. Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding.sv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 52d0b41b33..0ce95ed3f9 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -102,8 +102,7 @@ module thresholding #( if((C == 1) && (stage == 0)) begin logic [M-1:0] Thresh = 'x; always_ff @(posedge clk) begin - if(rst) Thresh <= 'x; - else if(we) Thresh <= twd; + if(we) Thresh <= twd; end assign thresh = Thresh; end From 2c4c8e224f8921848713f6d121532ff345c84fd0 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 31 Mar 2023 10:43:00 +0100 Subject: [PATCH 090/111] [thresholding]: enable PE testing of RTL threhoslding binary search node Signed-off-by: Fionn O'Donohoe --- .../fpgadataflow/convert_to_hls_layers.py | 10 ---------- .../fpgadataflow/test_convert_to_hls_thresholding.py | 10 ++-------- .../test_fpgadataflow_thresholding_binary_search.py | 11 ++--------- 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index dedcc30a38..4c06a28b75 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -1137,16 +1137,6 @@ def apply(self, model): % node.name ) - # Check PE/SIMD value - if pe != 1: - warnings.warn( - """%s : RTL Thresholding does not support paralellisation. - Only a PE value of 1 is supported. - Falling back to HLS implementation.""" - % node.name - ) - is_rtl_variant_compatible = False - if self.use_rtl_variant and is_rtl_variant_compatible: new_node = helper.make_node( "Thresholding_Binary_Search", diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 09067564eb..895c82d4ca 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -138,10 +138,10 @@ def make_single_multithresholding_modelwrapper( model.set_initializer("thresh", thresholds) return model - +# N.B. Fold values where C % PE != 0 fail @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) -@pytest.mark.parametrize("fold", [-1, 1, 2]) +@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6]) @pytest.mark.parametrize("num_input_channels", [16]) @pytest.mark.fpgadataflow @pytest.mark.vivado @@ -161,12 +161,6 @@ def test_convert_to_hls_tbs_rtl_variant( "RTL Thresholding Binary Search node" ) - # Paralellisation not supported for thresholding binary search rtl node - if pe != 1: - pytest.skip( - "Paralellisation not supported for RTL Thresholding Binary Search node" - ) - # Other non-input parameters num_input_vecs = [1, 2, 2] output_data_type = activation diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py index e57c4942c8..24b60f5ea5 100755 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_binary_search.py @@ -186,10 +186,10 @@ def test_fpgadataflow_thresholding_binary_search_prepare_rtlsim(): # Test brief: Create a Thresholding binary search layer using various parameters # and test against a SW generated & simulated dataset -# N.B. - fold factor of '-1' is supported only (no PE/SIMD support) +# N.B. Fold values where C % PE != 0 fail @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) -@pytest.mark.parametrize("fold", [-1, 1, 2]) +@pytest.mark.parametrize("fold", [-1, 1, 2, 4, 6]) @pytest.mark.parametrize("num_input_channels", [16]) @pytest.mark.fpgadataflow @pytest.mark.vivado @@ -201,13 +201,6 @@ def test_fpgadataflow_thresholding_binary_search( pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 - # Paralellisation not supported for thresholding binary search rtl node - if pe != 1: - pytest.skip( - "Paralellisation of IP not supported for " - "RTL Thresholding Binary Search node" - ) - # Other non-input parameters num_input_vecs = [1, 2, 2] output_data_type = activation From 5d07a435c2994f0238fb41ec21381d75ea049796 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 31 Mar 2023 10:45:34 +0100 Subject: [PATCH 091/111] [thresholding]: add comment about why bipolar activations skipped for threhsolding binary search node Signed-off-by: Fionn O'Donohoe --- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index 895c82d4ca..f2d76c8416 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -155,6 +155,9 @@ def test_convert_to_hls_tbs_rtl_variant( pe = generate_pe_value(fold, num_input_channels) num_steps = activation.get_num_possible_values() - 1 + # See convert_to_hls_layers::InferThresholdingLayer: + # assert (not odt.signed()) or (actval < 0) + # This implies that it expects a negative activation, BIPOLAR does not provide that if activation == DataType["BIPOLAR"]: pytest.skip( "Only negative activations are supported for " From fcf579ce01075bbeb997580fbafc8cd9d64ed50c Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Fri, 31 Mar 2023 11:32:42 +0100 Subject: [PATCH 092/111] fix precommit issues Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 2 +- tests/fpgadataflow/test_convert_to_hls_thresholding.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index e2453fcaad..694d25bfaa 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -283,7 +283,7 @@ def prepare_codegen_rtl_values(self): ] # input/threshold precision - convert bitwidth to string code_gen_dict["$C$"] = [str(num_channels)] # number of channels code_gen_dict["$BIAS$"] = [str(bias)] # activation bias value - code_gen_dict["$PE$"] = [str(pe)] # requires C = M*PE + code_gen_dict["$PE$"] = [str(pe)] # requires C = M*PE # Is the input datatype signed or unsigned? # The thresholding core needs to know this when comparing weights to inputs diff --git a/tests/fpgadataflow/test_convert_to_hls_thresholding.py b/tests/fpgadataflow/test_convert_to_hls_thresholding.py index f2d76c8416..9c233bdd06 100755 --- a/tests/fpgadataflow/test_convert_to_hls_thresholding.py +++ b/tests/fpgadataflow/test_convert_to_hls_thresholding.py @@ -138,6 +138,7 @@ def make_single_multithresholding_modelwrapper( model.set_initializer("thresh", thresholds) return model + # N.B. Fold values where C % PE != 0 fail @pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize("input_data_type", [DataType["INT16"], DataType["UINT16"]]) From 6c9d1f50177de5bb1c91eacc061d0aa8adb9cf56 Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 5 Apr 2023 16:28:55 +0100 Subject: [PATCH 093/111] [thresholding] only adjust MSB thresholding addressing bits when channel fold factor is present In the case where channel fold is not present (i.e. CF is 0), we saw incorrect threshold address programming. Without this commit and when no channel folding is present, this if statement is always stepped through and was damaging LSBs of the Addr signal, causing incorrect threshold address programming for a PE core. Although the logic in the if statement looks correct (i.e. programming 0 bits (clog2(CF) => 0)) and should not harm the Addr signal, it's best to avoid stepping through a case that does not exist (i.e., there is no channel folding and each channel has its own PE; therefore no extra bits needed to program multiple channel thresholds into a single PE core). Signed-off-by: Fionn O'Donohoe --- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 506e31b215..d2a7420a99 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -143,7 +143,7 @@ module thresholding_axi #( if(!WABusy) begin WABusy <= s_axilite_AWVALID; Addr[0+:N] <= s_axilite_AWADDR[2+:N]; - if(C > 1) Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)]; + if(CF > 1) Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)]; end if(!WDBusy) begin WDBusy <= s_axilite_WVALID; From b247ffbc258ec628a51c14822ec4343283ef5a2e Mon Sep 17 00:00:00 2001 From: Fionn O'Donohoe Date: Wed, 5 Apr 2023 19:44:36 +0100 Subject: [PATCH 094/111] [thresholding] update binary search to match qonnx 0.2.0 commit 65822357a7dba4f917c852d5f08bdebc7dd22e9d on dev moved all custom_ops to be compatible with qonnx 0.2.0 Signed-off-by: Fionn O'Donohoe --- src/finn/custom_op/fpgadataflow/thresholding_binary_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py index 694d25bfaa..d02b778823 100755 --- a/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_binary_search.py @@ -60,8 +60,8 @@ class Thresholding_Binary_Search(HLSCustomOp): """Class that corresponds to finn-rtllib 'thresholding' function.""" - def __init__(self, onnx_node): - super().__init__(onnx_node) + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) def get_nodeattr_types(self): my_attrs = { From 7be5ce412e5747f17fe0062769cd2cc476b5bfa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 17 Apr 2023 07:53:44 +0100 Subject: [PATCH 095/111] Defaulting BIAS and SIGNED parameters. Renaming M to K avoiding naming collision with uniform option. --- finn-rtllib/thresholding/hdl/thresholding.sv | 28 +++++++++---------- .../thresholding/hdl/thresholding_axi.sv | 27 +++++++++--------- .../hdl/thresholding_axi_wrapper.v | 18 ++++++------ 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 0ce95ed3f9..d16a9219d7 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -45,11 +45,11 @@ *****************************************************************************/ module thresholding #( int unsigned N, // output precision - int unsigned M, // input/threshold precision + int unsigned K, // input/threshold precision int unsigned C, // number of channels - bit SIGNED, // signed inputs - int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) + bit SIGNED = 1, // signed inputs + int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), localparam int unsigned O_BITS = BIAS >= 0? @@ -63,15 +63,15 @@ module thresholding #( // Threshold Configuration input logic twe, input logic [$clog2(C)+N-1:0] twa, - input logic [ M-1:0] twd, + input logic [ K-1:0] twd, // Clock Enable for Stream Processing input logic en, // Input Stream input logic ivld, - input logic [C_BITS-1:0] icnl, // Ignored for C == 1 - input logic [M -1:0] idat, + input logic [C_BITS-1:0] icnl, // Ignored for C == 1 + input logic [K -1:0] idat, // Output Stream output logic ovld, @@ -81,10 +81,10 @@ module thresholding #( // Pipeline Links & Feed typedef struct packed { - logic vld; // Valid data identification - logic [C_BITS-1:0] cnl; // Channel - logic [M -1:0] val; // Original input value - logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage + logic vld; // Valid data identification + logic [C_BITS-1:0] cnl; // Channel + logic [K -1:0] val; // Original input value + logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage } pipe_t; uwire pipe_t pipe[0:N]; assign pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} }; // Feed original input @@ -94,20 +94,20 @@ module thresholding #( for(genvar stage = 0; stage < N; stage++) begin : genStages // Threshold Memory - uwire [M-1:0] thresh; + uwire [K-1:0] thresh; if(1) begin : blkUpdate // Write control: local select from global address uwire we = twe && tws[stage]; if((C == 1) && (stage == 0)) begin - logic [M-1:0] Thresh = 'x; + logic [K-1:0] Thresh = 'x; always_ff @(posedge clk) begin if(we) Thresh <= twd; end assign thresh = Thresh; end else begin - logic [M-1:0] Threshs[C * 2**stage]; + logic [K-1:0] Threshs[C * 2**stage]; uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; uwire [$clog2(C)+stage-1:0] ra; if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; @@ -119,7 +119,7 @@ module thresholding #( end // Read - logic [M-1:0] RdReg; + logic [K-1:0] RdReg; always_ff @(posedge clk) begin if(en) RdReg <= Threshs[ra]; end diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index d2a7420a99..2f0393a3e7 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -40,16 +40,15 @@ module thresholding_axi #( int unsigned N, // output precision - int unsigned M, // input/threshold precision + int unsigned K, // input/threshold precision int unsigned C, // Channels - int unsigned PE, // Processing Parallelism, requires C = M*PE + int unsigned PE, // Processing Parallelism, requires C = k*PE - bit SIGNED, // signed inputs - int BIAS, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) + bit SIGNED = 1, // signed inputs + int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - localparam int unsigned CF = 1 + (C-1)/PE, // Channel Fold + localparam int unsigned CF = 1 + (C-1)/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, - localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE), localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) @@ -86,7 +85,7 @@ module thresholding_axi #( //- AXI Stream - Input -------------- output logic s_axis_tready, input logic s_axis_tvalid, - input logic [((PE*M+7)/8)*8-1:0] s_axis_tdata, + input logic [((PE*K+7)/8)*8-1:0] s_axis_tdata, //- AXI Stream - Output ------------- input logic m_axis_tready, @@ -108,13 +107,13 @@ module thresholding_axi #( //- AXI Lite: Threshold Configuration ----------------------------------- uwire twe[PE]; uwire [$clog2(CF)+N-1:0] twa; - uwire [ M-1:0] twd; + uwire [ K-1:0] twd; if(1) begin : blkAxiLite logic WABusy = 0; logic WDBusy = 0; logic Sel[PE] = '{ default: 'x }; logic [$clog2(CF)+N-1:0] Addr = 'x; - logic [ M-1:0] Data = 'x; + logic [ K-1:0] Data = 'x; for(genvar pe = 0; pe < PE; pe++) begin assign twe[pe] = WABusy && WDBusy && Sel[pe]; @@ -147,7 +146,7 @@ module thresholding_axi #( end if(!WDBusy) begin WDBusy <= s_axilite_WVALID; - Data <= s_axilite_WDATA[M-1:0]; + Data <= s_axilite_WDATA[K-1:0]; end end end @@ -204,12 +203,12 @@ module thresholding_axi #( end : blkOutputDecouple - // localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE); + localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE); uwire ivld = s_axis_tvalid; uwire [C_BITS-1:0] icnl; - uwire [M -1:0] idat[PE]; + uwire [K -1:0] idat[PE]; for(genvar pe = 0; pe < PE; pe++) begin - assign idat[pe] = s_axis_tdata[pe*M+:M]; + assign idat[pe] = s_axis_tdata[pe*K+:K]; end assign s_axis_tready = en; @@ -234,7 +233,7 @@ module thresholding_axi #( // Core Thresholding Modules for(genvar pe = 0; pe < PE; pe++) begin : genCores - thresholding #(.N(N), .M(M), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core ( + thresholding #(.N(N), .K(K), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core ( .clk, .rst, .twe(twe[pe]), .twa, .twd, .en, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v index c27480f388..2657b39d98 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -34,20 +34,20 @@ module $MODULE_NAME_AXI_WRAPPER$ #( parameter N = $N$, // output precision - parameter M = $M$, // input/threshold precision + parameter K = $M$, // input/threshold precision parameter C = $C$, // Channels - parameter SIGNED = $SIGNED$, // signed inputs - int BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [-BIAS, 2^N-1 - BIAS) parameter PE = $PE$, + parameter SIGNED = $SIGNED$, // signed inputs + parameter BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) parameter O_BITS = BIAS > 0? - /* unsigned */ $clog2(2**N-BIAS) : - /* signed */ 1+$clog2(BIAS >= 2**(N-1)? BIAS : 2**N-BIAS) + /* unsigned */ $clog2(2**N+BIAS) : + /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) )( //- Global Control ------------------ - (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) input ap_clk, - (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) input ap_rst_n, //- AXI Lite ------------------------ @@ -78,7 +78,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( //- AXI Stream - Input -------------- output in0_V_TREADY, input in0_V_TVALID, - input [((PE*M+7)/8)*8-1:0] in0_V_TDATA, + input [((PE*K+7)/8)*8-1:0] in0_V_TDATA, //- AXI Stream - Output ------------- input out_V_TREADY, @@ -86,7 +86,7 @@ module $MODULE_NAME_AXI_WRAPPER$ #( output [((PE*O_BITS+7)/8)*8-1:0] out_V_TDATA ); - thresholding_axi #(.N(N), .M(M), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( + thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( //- Global Control ------------------ .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), From a0120f2501eddd6b71f7aff36f1f3092e229346d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Thu, 4 May 2023 09:58:42 +0100 Subject: [PATCH 096/111] Thresholding rework to support parameter readback. --- finn-rtllib/thresholding/hdl/axilite_if.v | 210 ++++++++++++ finn-rtllib/thresholding/hdl/thresholding.sv | 283 ++++++++++++---- .../thresholding/hdl/thresholding_axi.sv | 200 +++-------- .../hdl/thresholding_axi_wrapper.v | 130 -------- finn-rtllib/thresholding/sim/thresholding.tcl | 17 + .../thresholding/sim/thresholding_axi_tb.sv | 314 ++++++++++++++++++ .../thresholding/sim/thresholding_tb.sv | 272 +++++++++++++++ 7 files changed, 1067 insertions(+), 359 deletions(-) create mode 100644 finn-rtllib/thresholding/hdl/axilite_if.v delete mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v create mode 100644 finn-rtllib/thresholding/sim/thresholding.tcl create mode 100644 finn-rtllib/thresholding/sim/thresholding_axi_tb.sv create mode 100644 finn-rtllib/thresholding/sim/thresholding_tb.sv diff --git a/finn-rtllib/thresholding/hdl/axilite_if.v b/finn-rtllib/thresholding/hdl/axilite_if.v new file mode 100644 index 0000000000..bdd4de288e --- /dev/null +++ b/finn-rtllib/thresholding/hdl/axilite_if.v @@ -0,0 +1,210 @@ +/* + Copyright (c) 2020, Xilinx + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of FINN nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +module axi4lite_if +#( + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32,//AXI4 spec requires this to be strictly 32 or 64 + parameter IP_DATA_WIDTH = 64//can be any power-of-2 multiple of DATA_WIDTH +) +( +//system signals +input aclk, +input aresetn,//active low, asynchronous assertion and synchronous deassertion + +//Write channels +//write address +output reg awready, +input awvalid, +input [ADDR_WIDTH-1:0] awaddr, +input [2:0] awprot, +//write data +output reg wready, +input wvalid, +input [DATA_WIDTH-1:0] wdata, +input [(DATA_WIDTH/8)-1:0] wstrb, +//burst response +input bready, +output reg bvalid, +output reg [1:0] bresp,//NOTE: 00 = OKAY, 10 = SLVERR (write error) + +//Read channels +//read address +output reg arready, +input arvalid, +input [ADDR_WIDTH-1:0] araddr, +input [2:0] arprot, +//read data +input rready, +output reg rvalid, +output reg [1:0] rresp,//NOTE: 00 = OKAY, 10 = SLVERR (read error) +output reg [DATA_WIDTH-1:0] rdata, + +//IP-side interface +output reg ip_en, +output reg ip_wen, +output reg [ADDR_WIDTH-1:0] ip_addr, +output [IP_DATA_WIDTH-1:0] ip_wdata, +input ip_rack, +input [IP_DATA_WIDTH-1:0] ip_rdata +); + +localparam RESP_OKAY = 2'b00; +localparam RESP_SLVERR = 2'b10; +//get ceil(log2(ceil(IP_DATA_WIDTH/DATA_WIDTH))) +localparam NFOLDS_LOG = $clog2((IP_DATA_WIDTH + DATA_WIDTH - 1) / DATA_WIDTH); + +reg internal_ren; +reg internal_wen; +reg internal_wack; +reg [ADDR_WIDTH-1:0] internal_raddr; +reg [ADDR_WIDTH-1:0] internal_waddr; +reg [DATA_WIDTH-1:0] internal_wdata; +wire [DATA_WIDTH-1:0] internal_rdata; +reg internal_error = 0; + +//check DATA_WIDTH +initial begin + if(DATA_WIDTH != 32 & DATA_WIDTH != 64) begin + $display("AXI4Lite DATA_WIDTH must be 32 or 64"); + $finish; + end +end + +//transaction state machine +localparam STATE_IDLE = 0, + STATE_READ = 1, + STATE_WRITE = 2; + +reg [1:0] state; + +always @(posedge aclk or negedge aresetn) + if(~aresetn) + state <= STATE_IDLE; + else case(state) + STATE_IDLE: + if(awvalid & wvalid) + state <= STATE_WRITE; + else if(arvalid) + state <= STATE_READ; + STATE_READ: + if(rvalid & rready) + state <= STATE_IDLE; + STATE_WRITE: + if(bvalid & bready) + state <= STATE_IDLE; + default: state <= STATE_IDLE; + endcase + +//write-related internal signals +always @(*) begin + internal_waddr = awaddr >> $clog2(DATA_WIDTH/8); + internal_wdata = wdata; + internal_wen = (state == STATE_IDLE) & awvalid & wvalid; +end + +always @(posedge aclk) begin + awready <= internal_wen; + wready <= internal_wen; +end + +//read-related internal signals +always @(*) begin + internal_raddr = araddr >> $clog2(DATA_WIDTH/8); + internal_ren = (state == STATE_IDLE) & ~internal_wen & arvalid; +end + +always @(posedge aclk) + arready <= internal_ren; + +wire write_to_last_fold; + +always @(posedge aclk) begin + ip_wen <= write_to_last_fold; + ip_en <= internal_ren | write_to_last_fold; + if(internal_ren | write_to_last_fold) + ip_addr <= internal_ren ? (internal_raddr >> NFOLDS_LOG) : (internal_waddr >> NFOLDS_LOG); + internal_wack <= internal_wen; +end + +genvar i; +reg [(1<> (internal_rfold*DATA_WIDTH); + always @(posedge aclk) + if(internal_ren) + internal_rfold <= internal_raddr[NFOLDS_LOG-1:0]; + for(i=0; i<(1< [BIAS, 2^N-1 + BIAS] - localparam int unsigned C_BITS = C < 2? 1 : $clog2(C), + localparam int unsigned CF = C/PE, // Channel fold localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) @@ -61,101 +63,238 @@ module thresholding #( input logic rst, // Threshold Configuration - input logic twe, - input logic [$clog2(C)+N-1:0] twa, - input logic [ K-1:0] twd, - - // Clock Enable for Stream Processing - input logic en, + input logic cfg_en, + input logic cfg_we, + input logic [$clog2(CF)+$clog2(PE)+N-1:0] cfg_a, + input logic [K-1:0] cfg_d, + output logic cfg_rack, + output logic [K-1:0] cfg_q, // Input Stream + output logic irdy, input logic ivld, - input logic [C_BITS-1:0] icnl, // Ignored for C == 1 - input logic [K -1:0] idat, + input logic [PE-1:0][K-1:0] idat, // Output Stream + input logic ordy, output logic ovld, - output logic [C_BITS-1:0] ocnl, - output logic [O_BITS-1:0] odat + output logic [PE-1:0][O_BITS-1:0] odat ); - // Pipeline Links & Feed + // Parameter Constraints Checking + initial begin + if(CF*PE != C) begin + $error("Parallelism PE=%0d is not a multiple of channel count C=%0d.", PE, C); + $finish; + end + end + + // Operations within Pipeline + typedef enum logic [1:0] { + NOP = 2'b00, // No operation + TH = 2'b01, // Thresholding + WR = 2'b11, // Write (initialization) + RB = 2'b10, // Readback (validation) + CFG = 2'b1x // Config op (pointer-preserving) + } op_e; + + // Pipeline Link Type + typedef logic [$clog2(CF)+N-1:0] ptr_t; + typedef logic [K -1:0] val_t; typedef struct packed { - logic vld; // Valid data identification - logic [C_BITS-1:0] cnl; // Channel - logic [K -1:0] val; // Original input value - logic [0:N-1] res; // Assembling result with valid prefix [0:stage] after stage #stage + op_e op; + ptr_t ptr; // WR/RB: address; TH: result + val_t val; // WR/RB: threshold value; TH: input value } pipe_t; - uwire pipe_t pipe[0:N]; - assign pipe[0] = pipe_t'{ vld: ivld, cnl: icnl, val: idat, res: {N{1'bx}} }; // Feed original input - // Stages: 0, 1, ..., N-1 - uwire [0:N-1] tws = (twa[N-1:0]+1) & ~twa[N-1:0]; // Write Select per stage by address suffix - for(genvar stage = 0; stage < N; stage++) begin : genStages + //----------------------------------------------------------------------- + // Pipeline Feed + // - configuration always takes precedence + // - number of pending thresholding ops capped to N+3 + // across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1 + pipe_t pipe[PE][N+1]; + if(1) begin : blkFeed + + // Thresholding Input Guard ensuring Output FIFO is never overrun + logic signed [$clog2(N+3):0] GuardSem = N+2; // N+2, N+1, ..., 0, -1 + uwire th_full = GuardSem[$left(GuardSem)]; + always_ff @(posedge clk) begin + if(rst) GuardSem <= N+2; + else begin + automatic logic dec = !cfg_en && !th_full && ivld; + automatic logic inc = ovld && ordy; + GuardSem <= GuardSem + (inc == dec? 0 : inc? 1 : -1); + end + end - // Threshold Memory - uwire [K-1:0] thresh; - if(1) begin : blkUpdate + // PE Configuration Address Decoding + uwire cfg_sel[PE]; + if(PE == 1) assign cfg_sel[0] = 1; + else begin + for(genvar pe = 0; pe < PE; pe++) begin + assign cfg_sel[pe] = cfg_en && (cfg_a[N+:$clog2(PE)] == pe); + end + end - // Write control: local select from global address - uwire we = twe && tws[stage]; - if((C == 1) && (stage == 0)) begin - logic [K-1:0] Thresh = 'x; - always_ff @(posedge clk) begin - if(we) Thresh <= twd; + + uwire ptr_t iptr; + assign iptr[0+:N] = cfg_a[0+:N]; + if(CF > 1) begin + // Channel Fold Rotation + logic [$clog2(CF)-1:0] CnlCnt = 0; + logic CnlLst = 0; + always_ff @(posedge clk) begin + if(rst) begin + CnlCnt <= 0; + CnlLst <= 0; + end + else if(!cfg_en && !th_full && ivld) begin + CnlCnt <= CnlCnt + (CnlLst? 1-CF : 1); + CnlLst <= CnlCnt == CF-2; end - assign thresh = Thresh; end - else begin - logic [K-1:0] Threshs[C * 2**stage]; - uwire [$clog2(C)+stage-1:0] wa = twa[$left(twa):N-stage]; - uwire [$clog2(C)+stage-1:0] ra; - if(C > 1) assign ra[stage+:C_BITS] = pipe[stage].cnl; - if(stage) assign ra[stage-1:0] = pipe[stage].res[0:stage-1]; - - // Write - always_ff @(posedge clk) begin - if(we) Threshs[wa] <= twd; + + assign iptr[N+:$clog2(CF)] = cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt; + end + + for(genvar pe = 0; pe < PE; pe++) begin + assign pipe[pe][0] = '{ + op: cfg_en? + (!cfg_sel[pe]? NOP : cfg_we? WR : RB) : + (ivld && !th_full? TH : NOP), + ptr: iptr, + val: !cfg_en? idat[pe] : cfg_we? cfg_d : 0 + }; + end + + assign irdy = !cfg_en && !th_full; + end : blkFeed + + //----------------------------------------------------------------------- + // Free-Running Thresholding Pipeline + for(genvar stage = 0; stage < N; stage++) begin : genStages + + localparam int unsigned SN = N-1-stage; + for(genvar pe = 0; pe < PE; pe++) begin : genPE + uwire pipe_t p = pipe[pe][stage]; + uwire cs = (p.ptr[SN:0] == 2**SN-1); + + // Threshold Memory + logic [K-1:0] Thresh = 'x; // Read-out register + if(1) begin : blkThreshMem + uwire we = (p.op ==? WR) && cs; + if((CF == 1) && (stage == 0)) begin + always_ff @(posedge clk) begin + if(we) Thresh <= p.val; + end + end + else begin + logic [K-1:0] Threshs[CF * 2**stage]; + uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; + always_ff @(posedge clk) begin + if(we) Threshs[addr] <= p.val; + Thresh <= Threshs[addr]; + end end + end : blkThreshMem - // Read - logic [K-1:0] RdReg; - always_ff @(posedge clk) begin - if(en) RdReg <= Threshs[ra]; + // Pipeline State + pipe_t P = '{ op: NOP, default: 'x }; + logic Reval = 0; + always_ff @(posedge clk) begin + if(rst) begin + P <= '{ op: NOP, default: 'x }; + Reval <= 0; + end + else begin + P <= p; + Reval <= (p.op ==? RB) && cs; end - assign thresh = RdReg; end - end : blkUpdate + logic cmp; + if(!SIGNED) assign cmp = $unsigned(Thresh) <= $unsigned(P.val); + else if(!FPARG) assign cmp = $signed(Thresh) <= $signed(P.val); + else begin : blkSignedFloat + uwire mag_eq = Thresh[K-2:0] == P.val[K-2:0]; + uwire mag_le = Thresh[K-2:0] <= P.val[K-2:0]; + always_comb begin + unique case({Thresh[K-1], P.val[K-1]}) + 2'b00: cmp = mag_le; + 2'b01: cmp = 0; + 2'b10: cmp = 1; + 2'b11: cmp = !mag_le || mag_eq; + default: cmp = 'x; + endcase + end + end : blkSignedFloat + always_comb begin + automatic pipe_t pp = P; + if(P.op !=? CFG) pp.ptr[SN] = cmp; + if(Reval) pp.val = Thresh; + pipe[pe][stage+1] = pp; + end - // Pipeline regs simply copying the input - pipe_t State = '{ vld: 0, cnl: 'x, val: 'x, res: 'x }; - always_ff @(posedge clk) begin - if(rst) State <= '{ vld: 0, cnl: 'x, val: 'x, res: 'x }; - else if(en) State <= pipe[stage]; - end + end : genPE + end : genStages - // Assemble pipeline data - logic [0:N-1] res; - uwire cmp = - SIGNED? $signed(thresh) <= $signed(State.val) : - /* else */ $unsigned(thresh) <= $unsigned(State.val); - always_comb begin - res = State.res; - res[stage] = cmp; // Patch in next result bit + //----------------------------------------------------------------------- + // Configuration Readback + always_comb begin + cfg_rack = 0; + cfg_q = 0; + foreach(pipe[pe]) begin + automatic pipe_t p = pipe[pe][N]; + cfg_rack |= p.op ==? RB; + cfg_q |= p.val; end - assign pipe[stage+1] = '{ - vld: State.vld, - cnl: State.cnl, - val: State.val, - res: res - }; + end - end : genStages + //----------------------------------------------------------------------- + // Stream Output through FIFO + // - Depth of N + Output Reg to allow pipe to drain entirely under backpressure + // - Typically mapped to an SRL shift register + if(1) begin : blkStreamOutput + localparam int unsigned A_DEPTH = N+2; + logic [PE-1 : 0][N-1 : 0] ADat[A_DEPTH]; + logic signed [$clog2(A_DEPTH):0] APtr = '1; // -1, 0, 1, ..., A_DEPTH-1 + uwire avld = !APtr[$left(APtr)]; - // Output - assign ovld = pipe[N].vld; - assign ocnl = pipe[N].cnl; - assign odat = pipe[N].res + BIAS; + logic [PE-1:0][N-1:0] BDat = 'x; + logic BVld = 0; + + uwire aload = pipe[0][N].op ==? TH; + uwire bload = !BVld || ordy; + + always_ff @(posedge clk) begin + if(aload) begin + assert(APtr < $signed(A_DEPTH-1)) else begin + $error("Overrun after failing stream guard."); + $stop; + end + foreach(pipe[pe]) ADat[0][pe] <= pipe[pe][N].ptr; + for(int unsigned i = 1; i < A_DEPTH; i++) ADat[i] <= ADat[i-1]; + end + end + always_ff @(posedge clk) begin + if(rst) APtr <= '1; + else APtr <= APtr + (aload == (avld && bload)? 0 : aload? 1 : -1); + end + always_ff @(posedge clk) begin + if(rst) begin + BDat <= 'x; + BVld <= 0; + end + else if(bload) begin + BDat <= ADat[APtr]; + BVld <= avld; + end + end + + assign ovld = BVld; + for(genvar pe = 0; pe < PE; pe++) begin + assign odat[pe] = BDat[pe] + BIAS; + end + end : blkStreamOutput endmodule : thresholding diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 2f0393a3e7..98bbe20691 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -39,15 +39,16 @@ *****************************************************************************/ module thresholding_axi #( - int unsigned N, // output precision - int unsigned K, // input/threshold precision - int unsigned C, // Channels - int unsigned PE, // Processing Parallelism, requires C = k*PE + int unsigned N = 4, // output precision + int unsigned K = 16, // input/threshold precision + int unsigned C = 1, // Channels + int unsigned PE = 1, // Processing Parallelism, requires C = k*PE bit SIGNED = 1, // signed inputs + bit FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - localparam int unsigned CF = 1 + (C-1)/PE, // Channel Fold + localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : @@ -73,9 +74,9 @@ module thresholding_axi #( output logic [1:0] s_axilite_BRESP, // Reading - input logic s_axilite_ARVALID, - output logic s_axilite_ARREADY, - input logic [0:0] s_axilite_ARADDR, + input logic s_axilite_ARVALID, + output logic s_axilite_ARREADY, + input logic [ADDR_BITS-1:0] s_axilite_ARADDR, output logic s_axilite_RVALID, input logic s_axilite_RREADY, @@ -92,154 +93,39 @@ module thresholding_axi #( output logic m_axis_tvalid, output logic [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata ); - //- Parameter Constraints Checking -------------------------------------- - initial begin - if(C%PE != 0) begin - $error("%m: Channel count C=%0d is not a multiple of PE=%0d.", C, PE); - $finish; - end - end - - //- Global Control ------------------------------------------------------ - uwire clk = ap_clk; - uwire rst = !ap_rst_n; - - //- AXI Lite: Threshold Configuration ----------------------------------- - uwire twe[PE]; - uwire [$clog2(CF)+N-1:0] twa; - uwire [ K-1:0] twd; - if(1) begin : blkAxiLite - logic WABusy = 0; - logic WDBusy = 0; - logic Sel[PE] = '{ default: 'x }; - logic [$clog2(CF)+N-1:0] Addr = 'x; - logic [ K-1:0] Data = 'x; - - for(genvar pe = 0; pe < PE; pe++) begin - assign twe[pe] = WABusy && WDBusy && Sel[pe]; - end - assign twa = Addr; - assign twd = Data; - - if(PE == 1) always_comb Sel[0] = 1; - else begin - always_ff @(posedge clk) begin - if(!WABusy) begin - foreach(Sel[pe]) Sel[pe] <= s_axilite_AWADDR[N+2+:$clog2(PE)] == pe; - end - end - end - - uwire clr_wr = rst || (WABusy && WDBusy && s_axilite_BREADY); - always_ff @(posedge clk) begin - if(clr_wr) begin - WABusy <= 0; - Addr <= 'x; - WDBusy <= 0; - Data <= 'x; - end - else begin - if(!WABusy) begin - WABusy <= s_axilite_AWVALID; - Addr[0+:N] <= s_axilite_AWADDR[2+:N]; - if(CF > 1) Addr[N+:$clog2(CF)] <= s_axilite_AWADDR[2+N+$clog2(PE)+:$clog2(CF)]; - end - if(!WDBusy) begin - WDBusy <= s_axilite_WVALID; - Data <= s_axilite_WDATA[K-1:0]; - end - end - end - assign s_axilite_AWREADY = !WABusy; - assign s_axilite_WREADY = !WDBusy; - assign s_axilite_BVALID = WABusy && WDBusy; - assign s_axilite_BRESP = '0; // OK - - // Answer all reads with '1 - logic RValid = 0; - uwire clr_rd = rst || (RValid && s_axilite_RREADY); - always_ff @(posedge clk) begin - if(clr_rd) RValid <= 0; - else if(!RValid) RValid <= s_axilite_ARVALID; - end - assign s_axilite_ARREADY = !RValid; - assign s_axilite_RVALID = RValid; - assign s_axilite_RDATA = '1; - assign s_axilite_RRESP = '0; // OK - - end : blkAxiLite - - //- IO-Sandwich with two-stage output buffer for containing a local enable - uwire en; - uwire [PE-1:0][O_BITS-1:0] odat; - uwire ovld[PE]; - if(1) begin : blkOutputDecouple - typedef struct { - logic vld; - logic [PE-1:0][O_BITS-1:0] dat; - } buf_t; - buf_t A = '{ vld: 0, dat: 'x }; - buf_t B = '{ vld: 0, dat: 'x }; - always_ff @(posedge clk) begin - if(rst) begin - A <= '{ vld: 0, dat: 'x }; - B <= '{ vld: 0, dat: 'x }; - end - else begin - if(!B.vld || m_axis_tready) begin - B <= '{ - vld: A.vld || ovld[0], - dat: A.vld? A.dat : odat - }; - end - A.vld <= B.vld && !m_axis_tready && (A.vld || ovld[0]); - if(!A.vld) A.dat <= odat; - end - end - assign en = !A.vld; - - assign m_axis_tvalid = B.vld; - assign m_axis_tdata = B.dat; - - end : blkOutputDecouple - - localparam int unsigned C_BITS = C/PE < 2? 1 : $clog2(C/PE); - uwire ivld = s_axis_tvalid; - uwire [C_BITS-1:0] icnl; - uwire [K -1:0] idat[PE]; - for(genvar pe = 0; pe < PE; pe++) begin - assign idat[pe] = s_axis_tdata[pe*K+:K]; - end - - assign s_axis_tready = en; - if(C == PE) assign icnl = 'x; - else begin - logic [C_BITS-1:0] Chnl = 0; - logic Last = 0; - uwire inc = ivld && en; - uwire clr = rst || (Last && inc); - always_ff @(posedge clk) begin - if(clr) begin - Chnl <= 0; - Last <= 0; - end - else if(inc) begin - Chnl <= Chnl + 1; - Last <= (~Chnl & (C/PE-2)) == 0; - end - end - assign icnl = Chnl; - end - - // Core Thresholding Modules - for(genvar pe = 0; pe < PE; pe++) begin : genCores - thresholding #(.N(N), .K(K), .C(C/PE), .SIGNED(SIGNED), .BIAS(BIAS)) core ( - .clk, .rst, - .twe(twe[pe]), .twa, .twd, - .en, - .ivld, .icnl, .idat(idat[pe]), - .ovld(ovld[pe]), .ocnl(), .odat(odat[pe]) - ); - end : genCores + + //----------------------------------------------------------------------- + // AXI-lite Configuration Interface + uwire cfg_en; + uwire cfg_we; + uwire [ADDR_BITS-1:0] cfg_a; + uwire [K -1:0] cfg_d; + uwire cfg_rack; + uwire [K -1:0] cfg_q; + axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi ( + .aclk(ap_clk), .aresetn(ap_rst_n), + + .awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x), + .wready(s_axilite_WREADY), .wvalid(s_axilite_WVALID), .wdata(s_axilite_WDATA), .wstrb(s_axilite_WSTRB), + .bready(s_axilite_BREADY), .bvalid(s_axilite_BVALID), .bresp(s_axilite_BRESP), + + .arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x), + .rready(s_axilite_RREADY), .rvalid(s_axilite_RVALID), .rresp(s_axilite_RRESP), .rdata(s_axilite_RDATA), + + .ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d), + .ip_rack(cfg_rack), .ip_rdata(cfg_q) + ); + + //----------------------------------------------------------------------- + // Kernel Implementation + thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS)) impl ( + .clk(ap_clk), .rst(!ap_rst_n), + + .cfg_en, .cfg_we, .cfg_a, .cfg_d, + .cfg_rack, .cfg_q, + + .irdy(s_axis_tready), .ivld(s_axis_tvalid), .idat(s_axis_tdata), + .ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata) + ); endmodule : thresholding_axi diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v deleted file mode 100644 index 2657b39d98..0000000000 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ /dev/null @@ -1,130 +0,0 @@ -/****************************************************************************** - * Copyright (C) 2022, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @brief IPI-compatible Verilog wrapper for thresholding_axi module. - * @author Thomas B. Preußer - *****************************************************************************/ - -module $MODULE_NAME_AXI_WRAPPER$ #( - parameter N = $N$, // output precision - parameter K = $M$, // input/threshold precision - parameter C = $C$, // Channels - parameter PE = $PE$, - parameter SIGNED = $SIGNED$, // signed inputs - parameter BIAS = $BIAS$, // offsetting the output [0, 2^N-1) -> [BIAS, 2^N-1 + BIAS) - - parameter O_BITS = BIAS > 0? - /* unsigned */ $clog2(2**N+BIAS) : - /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) -)( - //- Global Control ------------------ - (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) - input ap_clk, - (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *) - input ap_rst_n, - - //- AXI Lite ------------------------ - // Writing - input s_axilite_AWVALID, - output s_axilite_AWREADY, - input [$clog2(C)+N+1:0] s_axilite_AWADDR, - - input s_axilite_WVALID, - output s_axilite_WREADY, - input [31:0] s_axilite_WDATA, - input [ 3:0] s_axilite_WSTRB, - - output s_axilite_BVALID, - input s_axilite_BREADY, - output [1:0] s_axilite_BRESP, - - // Reading - input s_axilite_ARVALID, - output s_axilite_ARREADY, - input [0:0] s_axilite_ARADDR, - - output s_axilite_RVALID, - input s_axilite_RREADY, - output [31:0] s_axilite_RDATA, - output [ 1:0] s_axilite_RRESP, - - //- AXI Stream - Input -------------- - output in0_V_TREADY, - input in0_V_TVALID, - input [((PE*K+7)/8)*8-1:0] in0_V_TDATA, - - //- AXI Stream - Output ------------- - input out_V_TREADY, - output out_V_TVALID, - output [((PE*O_BITS+7)/8)*8-1:0] out_V_TDATA -); - - thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .BIAS(BIAS)) inst ( - //- Global Control ------------------ - .ap_clk(ap_clk), - .ap_rst_n(ap_rst_n), - - //- AXI Lite ------------------------ - // Writing - .s_axilite_AWVALID(s_axilite_AWVALID), - .s_axilite_AWREADY(s_axilite_AWREADY), - .s_axilite_AWADDR(s_axilite_AWADDR), - - .s_axilite_WVALID(s_axilite_WVALID), - .s_axilite_WREADY(s_axilite_WREADY), - .s_axilite_WDATA(s_axilite_WDATA), - .s_axilite_WSTRB(s_axilite_WSTRB), - - .s_axilite_BVALID(s_axilite_BVALID), - .s_axilite_BREADY(s_axilite_BREADY), - .s_axilite_BRESP(s_axilite_BRESP), - - // Reading - .s_axilite_ARVALID(s_axilite_ARVALID), - .s_axilite_ARREADY(s_axilite_ARREADY), - .s_axilite_ARADDR(s_axilite_ARADDR), - - .s_axilite_RVALID(s_axilite_RVALID), - .s_axilite_RREADY(s_axilite_RREADY), - .s_axilite_RDATA(s_axilite_RDATA), - .s_axilite_RRESP(s_axilite_RRESP), - - //- AXI Stream - Input -------------- - .s_axis_tready(in0_V_TREADY), - .s_axis_tvalid(in0_V_TVALID), - .s_axis_tdata(in0_V_TDATA), - - //- AXI Stream - Output ------------- - .m_axis_tready(out_V_TREADY), - .m_axis_tvalid(out_V_TVALID), - .m_axis_tdata(out_V_TDATA) - ); - -endmodule : $MODULE_NAME_AXI_WRAPPER$ diff --git a/finn-rtllib/thresholding/sim/thresholding.tcl b/finn-rtllib/thresholding/sim/thresholding.tcl new file mode 100644 index 0000000000..82dc59deb1 --- /dev/null +++ b/finn-rtllib/thresholding/sim/thresholding.tcl @@ -0,0 +1,17 @@ +create_project -force thresholding thresholding.vivado -part xcvc1902-vsva2197-2MP-e-S +set_property board_part xilinx.com:vck190:part0:2.2 [current_project] + +read_verilog hdl/axilite_if.v +read_verilog -sv { hdl/thresholding.sv hdl/thresholding_axi.sv } + +set simset [current_fileset -simset] +set_property -name xsim.simulate.log_all_signals -value true -objects $simset +set_property -name xsim.simulate.runtime -value all -objects $simset +add_files -fileset $simset { sim/thresholding_tb.sv sim/thresholding_axi_tb.sv } + +foreach top { thresholding_tb thresholding_axi_tb } { + set_property top $top $simset + + launch_simulation + close_sim +} diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv new file mode 100644 index 0000000000..200d4d5999 --- /dev/null +++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv @@ -0,0 +1,314 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Testbench for thresholding_axi. + * @author Monica Chiosa + * + */ + +module thresholding_axi_tb #( + int unsigned N = 4, // output precision + int unsigned C = 6, // number of channels + int unsigned PE = 2, + real M0 = 7.3, // slope of the uniform thresholding line + real B0 = 3.1, // offset of the uniform thresholding line + bit THROTTLED = 1, + + localparam int unsigned CF = C/PE, // Channel Fold + localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2 +); + + //----------------------------------------------------------------------- + // Design Geometry + + // For each channel = [0,channel): + // M_channel = M0 + CX*channel + // B_channel = B0 + CX*channel + // Input/threshold precision computed according with the maximum posible value + localparam real CX = 1.375; + localparam int unsigned K = $clog2((2**N-1)*(M0+C*CX) + (B0+C*CX)); // unused sign + magnitude + localparam int unsigned C_BITS = C < 2? 1 : $clog2(C); + + localparam int unsigned MST_STRM_WROUNDS = 503; + + typedef int unsigned threshs_t[C][2**N-1]; + function threshs_t init_thresholds(); + automatic threshs_t res; + for(int unsigned c = 0; c < C; c++) begin + automatic real m = M0 + c*CX; + automatic real b = B0 + c*CX; + foreach(res[c][i]) begin + res[c][i] = int'($ceil(m*i + b)); + end + end + return res; + endfunction : init_thresholds + localparam threshs_t THRESHS = init_thresholds(); + + //----------------------------------------------------------------------- + // Clock and Reset Control + logic clk = 0; + always #5ns clk = !clk; + logic rst = 1; + initial begin + #10ns; + @(posedge clk); + rst <= 0; + end + + //----------------------------------------------------------------------- + // DUT + logic s_axilite_AWVALID; + uwire s_axilite_AWREADY; + logic [ADDR_BITS-1:0] s_axilite_AWADDR; // lowest 2 bits (byte selectors) are ignored + logic s_axilite_WVALID; + uwire s_axilite_WREADY; + logic [ 31:0] s_axilite_WDATA; + uwire s_axilite_BVALID; + logic s_axilite_BREADY; + uwire [ 1:0] s_axilite_BRESP; + logic s_axilite_ARVALID; + uwire s_axilite_ARREADY; + logic [ADDR_BITS-1:0] s_axilite_ARADDR; + uwire s_axilite_RVALID; + uwire s_axilite_RREADY = 1; + uwire [ 31:0] s_axilite_RDATA; + uwire [ 1:0] s_axilite_RRESP; + + uwire irdy; + logic ivld; + logic [PE-1:0][K-1:0] idat; + + logic ordy = 0; + uwire ovld; + uwire [PE-1:0][N-1:0] odat; + + thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0)) dut ( + .ap_clk(clk), .ap_rst_n(!rst), + + // Configuration + .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, + .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB('1), + .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, + .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, + .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, + + // Stream Processing + .s_axis_tready(irdy), .s_axis_tvalid(ivld), .s_axis_tdata(idat), + .m_axis_tready(ordy), .m_axis_tvalid(ovld), .m_axis_tdata(odat) + ); + + //----------------------------------------------------------------------- + // Input Stimuli + typedef logic [PE-1:0][K-1:0] input_t; + typedef logic [$clog2(CF)+$clog2(PE)+N-1:0] addr_t; + input_t QW[$]; // Input Feed Tracing + addr_t QC[$]; + + int unsigned error_cnt = 0; + bit done = 0; + initial begin + // Report testbench details + $display("Testbench - tresholding K=%0d -> N=%0d", K, N); + for(int unsigned c = 0; c < C; c++) begin + $write("Channel #%0d: Thresholds = {", c); + for(int unsigned i = 0; i < 2**N-1; i++) $write(" %0d", THRESHS[c][i]); + $display(" }"); + end + + // Config + s_axilite_AWVALID = 0; + s_axilite_AWADDR = 'x; + s_axilite_WVALID = 0; + s_axilite_WDATA = 'x; + s_axilite_BREADY = 0; + s_axilite_ARVALID = 0; + s_axilite_ARADDR = 'x; + + // Stream Input + ivld = 0; + idat = 'x; + + @(posedge clk iff !rst); + + // Threshold Configuratin + for(int unsigned c = 0; c < C; c+=PE) begin + automatic addr_t addr = 0; + if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = c/PE; + for(int unsigned pe = 0; pe < PE; pe++) begin + if(PE > 1) addr[N+:$clog2(PE)] = pe; + for(int unsigned t = 0; t < 2**N-1; t++) begin + addr[0+:N] = t; + fork + begin + s_axilite_AWVALID <= 1; + s_axilite_AWADDR <= { addr, 2'b00 }; + @(posedge clk iff s_axilite_AWREADY); + s_axilite_AWVALID <= 0; + s_axilite_AWADDR <= 'x; + end + begin + s_axilite_WVALID <= 1; + s_axilite_WDATA <= THRESHS[c+pe][t]; + @(posedge clk iff s_axilite_WREADY); + s_axilite_WVALID <= 0; + s_axilite_WDATA <= 'x; + end + begin + s_axilite_BREADY <= 1; + @(posedge clk iff s_axilite_BVALID); + assert(s_axilite_BRESP == '0) else begin + $error("Error on parameter write."); + $stop; + end + s_axilite_BREADY <= 0; + end + join + end + end + end + + fork + // Intermittent configuration readback + while(!done) begin + if(($urandom()%37) != 0) begin + s_axilite_ARVALID <= 0; + s_axilite_ARADDR <= 'x; + @(posedge clk); + end + else begin + automatic addr_t addr = $urandom()%(N-1); + if(PE > 1) addr[N+:$clog2(PE)] = $urandom()%PE; + if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF; + + s_axilite_ARVALID <= 1; + s_axilite_ARADDR <= { addr, 2'b00 }; + @(posedge clk iff s_axilite_ARREADY); + + QC.push_back(addr); + end + end + + // AXI4Stream MST Writes input values + repeat(MST_STRM_WROUNDS) begin + automatic input_t dat; + + while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk); + + std::randomize(dat); + ivld <= 1; + idat <= dat; + @(posedge clk iff irdy); + ivld <= 0; + idat <= 'x; + QW.push_back(dat); + end + join_any + done <= 1; + repeat(N+6) @(posedge clk); + + assert(QW.size() == 0) else begin + $error("Missing %0d outputs.", QW.size()); + $stop; + end + assert(QC.size() == 0) else begin + $error("Missing %0d readback replies.", QC.size()); + $stop; + end + + $display("Test completed: %0d errors in %0d tests.", error_cnt, MST_STRM_WROUNDS); + $display("========================================="); + $finish; + end + + // Output Checker ------------------------------------------------------- + + // Configuration Readback + always_ff @(posedge clk iff s_axilite_RVALID) begin + assert(s_axilite_RRESP == '0) else begin + $error("Read back error."); + $stop; + end + assert(QC.size()) begin + automatic addr_t addr = QC.pop_front(); + automatic int unsigned cnl = + (CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) + + (PE == 1? 0 : addr[N+:$clog2(PE)]); + automatic logic [K-1:0] exp = THRESHS[cnl][addr[0+:N]]; + assert(s_axilite_RDATA == exp) else begin + $error("Readback mismatch on #%0d.%0d: %0d instead of %0d", cnl, addr[0+:N], s_axilite_RDATA, exp); + $stop; + end + end + else begin + $error("Spurious readback output."); + $stop; + end + end + + // Stream Output + int unsigned OCnl = 0; + always @(posedge clk) begin + if(rst) begin + OCnl <= 0; + ordy <= 1'b0; + end + else begin + if(!ordy || ovld) ordy <= ($urandom()%5 != 0) || !THROTTLED; + + if(ordy && ovld) begin + assert(QW.size()) begin + automatic input_t x = QW.pop_front(); + + for(int unsigned pe = 0; pe < PE; pe++) begin + automatic int unsigned cnl = OCnl + pe; + + $display("Mapped CNL=%0d DAT=%3d -> #%2d", cnl, x[pe], odat[pe]); + assert( + ((odat[pe] == 0) || (THRESHS[cnl][odat[pe]-1] <= x[pe])) && + ((odat[pe] == 2**N-1) || (x[pe] < THRESHS[cnl][odat[pe]])) + ) else begin + $error("Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", cnl, x[pe], odat[pe]); + error_cnt++; + $stop; + end + end + end + else begin + $error("Spurious output."); + $stop; + end + + OCnl <= (OCnl + PE)%C; + end + end + end + +endmodule: thresholding_axi_tb diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv new file mode 100644 index 0000000000..90dfba1022 --- /dev/null +++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv @@ -0,0 +1,272 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Testbench for thresholding_axi. + * @author Monica Chiosa + * + */ + +module thresholding_tb #( + int unsigned K = 10, // input precision + int unsigned N = 4, // output precision + int unsigned C = 6, // number of channels + int unsigned PE = 2, + + localparam int unsigned CF = C/PE // Channel Fold +); + localparam int unsigned MST_STRM_WROUNDS = 507; + localparam bit THROTTLED = 1; + + //----------------------------------------------------------------------- + // Clock and Reset Control + logic clk = 0; + always #5ns clk = !clk; + logic rst = 1; + initial begin + #10ns; + @(posedge clk); + rst <= 0; + end + + //----------------------------------------------------------------------- + // Parallel Instances differing in Data Type + typedef logic [K -1:0] val_t; + typedef val_t threshs_t[C][2**N-1]; + typedef val_t [PE-1:0] input_t; + typedef logic [$clog2(CF)+$clog2(PE)+N-1:0] addr_t; + logic [0:2] term = '0; + always_comb begin + if(&term) $finish; + end + for(genvar i = 0; i < 3; i++) begin : genTypes + localparam bit SIGNED = i>0; + localparam bit FPARG = i>1; + + //- DUT ------------------------- + logic cfg_en; + logic cfg_we; + logic [$clog2(C)+N-1:0] cfg_a; + logic [K-1:0] cfg_d; + uwire cfg_rack; + uwire [K-1:0] cfg_q; + + uwire irdy; + logic ivld; + logic [PE-1:0][K-1:0] idat; + + logic ordy = 0; + uwire ovld; + uwire [PE-1:0][N-1:0] odat; + + thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG)) dut ( + .clk, .rst, + + // Configuration + .cfg_en, .cfg_we, .cfg_a, .cfg_d, + .cfg_rack, .cfg_q, + + // Stream Processing + .irdy, .ivld, .idat, + .ordy, .ovld, .odat + ); + + //- Stimulus Driver ------------- + threshs_t THRESHS; + function val_t sigord(input val_t x); + automatic val_t res = x; + if(SIGNED) begin + if(FPARG && x[K-1]) res[K-2:0] = ~x[K-2:0]; + res[K-1] = !x[K-1]; + end + return res; + endfunction : sigord + + input_t QW[$]; // Input tracing + addr_t QC[$]; // Readback tracking + int unsigned error_cnt = 0; + bit done = 0; + initial begin + + // Generate thresholds + std::randomize(THRESHS); + foreach(THRESHS[c]) begin + val_t row[2**N-1] = THRESHS[c]; + row.sort with (sigord(item)); + THRESHS[c] = row; + end + + // Report test case details + $display("[%0d] Thresholding %s%s%0d -> uint%0d", i, SIGNED? "s" : "u", FPARG? "fp" : "int", K, N); + for(int unsigned c = 0; c < C; c++) begin + $write("[%0d] Channel #%0d: Thresholds = {", i, c); + for(int unsigned i = 0; i < 2**N-1; i++) $write(" %0X", THRESHS[c][i]); + $display(" }"); + end + + // Config + cfg_en = 0; + cfg_we = 'x; + cfg_a = 'x; + cfg_d = 'x; + + // Stream Input + ivld = 0; + idat = 'x; + + @(posedge clk iff !rst); + + // Threshold Configuratin + cfg_en <= 1; + cfg_we <= 1; + for(int unsigned c = 0; c < C; c+=PE) begin + if(CF > 1) cfg_a[N+$clog2(PE)+:$clog2(CF)] <= c/PE; + for(int unsigned pe = 0; pe < PE; pe++) begin + if(PE > 1) cfg_a[N+:$clog2(PE)] = pe; + for(int unsigned t = 0; t < 2**N-1; t++) begin + cfg_a[0+:N] <= t; + cfg_d <= THRESHS[c+pe][t]; + @(posedge clk); + end + end + end + cfg_d <= 'x; + + fork + // Intermittent configuration readback + while(!done) begin + cfg_en <= 0; + cfg_we <= 'x; + cfg_a <= 'x; + @(posedge clk); + if(($urandom()%37) == 0) begin + automatic addr_t addr = $urandom()%(N-1); + if(PE > 1) addr[N+:$clog2(PE)] = $urandom()%PE; + if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF; + + cfg_en <= 1; + cfg_we <= 0; + cfg_a <= addr; + @(posedge clk); + QC.push_back(addr); + end + end + + // AXI4Stream MST Writes input values + repeat(MST_STRM_WROUNDS) begin + automatic input_t dat; + + while(THROTTLED && ($urandom()%7 == 0)) @(posedge clk); + + std::randomize(dat); + ivld <= 1; + idat <= dat; + @(posedge clk iff irdy); + ivld <= 0; + idat <= 'x; + QW.push_back(dat); + end + join_any + done <= 1; + repeat(N+6) @(posedge clk); + + assert(QW.size() == 0) else begin + $error("[%0d] Missing %0d outputs.", i, QW.size()); + $stop; + end + assert(QC.size() == 0) else begin + $error("[%0d] Missing %0d readback replies.", i, QC.size()); + $stop; + end + + $display("[%0d] Test completed: %0d errors in %0d tests.", i, error_cnt, MST_STRM_WROUNDS); + $display("============================================="); + term[i] <= 1; + end + + //- Readback Checker -------------- + always_ff @(posedge clk iff cfg_rack) begin + assert(QC.size()) begin + automatic addr_t addr = QC.pop_front(); + automatic int unsigned cnl = + (CF == 1? 0 : addr[N+$clog2(PE)+:$clog2(CF)] * PE) + + (PE == 1? 0 : addr[N+:$clog2(PE)]); + automatic logic [K-1:0] exp = THRESHS[cnl][addr[0+:N]]; + assert(cfg_q == exp) else begin + $error("[%0d] Readback mismatch on #%0d.%0d: %0d instead of %0d", i, cnl, addr[0+:N], cfg_q, exp); + $stop; + end + end + else begin + $error("[%0d] Spurious readback output.", i); + $stop; + end + end + + // Output Checker + int unsigned OCnl = 0; + always @(posedge clk) begin + if(rst) begin + OCnl <= 0; + ordy <= 1'b0; + end + else begin + if(!ordy || ovld) ordy <= ($urandom()%5 != 0) || !THROTTLED; + + if(ordy && ovld) begin + assert(QW.size()) begin + automatic input_t x = QW.pop_front(); + + for(int unsigned pe = 0; pe < PE; pe++) begin + automatic int unsigned cnl = OCnl + pe; + + $display("[%0d] Mapped CNL=%0d DAT=%3x -> #%2d", i, cnl, x[pe], odat[pe]); + assert( + ((odat[pe] == 0) || (sigord(THRESHS[cnl][odat[pe]-1]) <= sigord(x[pe]))) && + ((odat[pe] == 2**N-1) || (sigord(x[pe]) < sigord(THRESHS[cnl][odat[pe]]))) + ) else begin + $error("[%0d] Output error on presumed input CNL=%0d DAT=0x%0x -> #%0d", i, cnl, x[pe], odat[pe]); + error_cnt++; + $stop; + end + end + end + else begin + $error("[%0d] Spurious output.", i); + $stop; + end + + OCnl <= (OCnl + PE)%C; + end + end + end + + end : genTypes + +endmodule: thresholding_tb From 28e5ad7d81d32f0bf26aac773aa50db40a289c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 27 Jun 2023 06:50:20 +0100 Subject: [PATCH 097/111] Packaged IP for RTL thresholding implementation. --- finn-rtllib/thresholding/component.xml | 1002 +++++++++++++++++ .../gui/thresholding_axi_v1_0.gtcl | 4 + .../thresholding/hdl/thresholding_axi.sv | 4 +- .../hdl/thresholding_axi_wrapper.v | 110 ++ .../xgui/thresholding_axi_v1_0.tcl | 187 +++ 5 files changed, 1305 insertions(+), 2 deletions(-) create mode 100644 finn-rtllib/thresholding/component.xml create mode 100644 finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v create mode 100644 finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl diff --git a/finn-rtllib/thresholding/component.xml b/finn-rtllib/thresholding/component.xml new file mode 100644 index 0000000000..e28a3a2c2d --- /dev/null +++ b/finn-rtllib/thresholding/component.xml @@ -0,0 +1,1002 @@ + + + amd.com + finn + thresholding_axi + 1.0 + + + ap_clk + + + + + + + CLK + + + ap_clk + + + + + + ASSOCIATED_RESET + ap_rst_n + + + ASSOCIATED_BUSIF + s_axilite:s_axis:m_axis + + + FREQ_TOLERANCE_HZ + -1 + + + + + m_axis + + + + + + + TDATA + + + m_axis_tdata + + + + + TVALID + + + m_axis_tvalid + + + + + TREADY + + + m_axis_tready + + + + + + s_axis + + + + + + + TDATA + + + s_axis_tdata + + + + + TVALID + + + s_axis_tvalid + + + + + TREADY + + + s_axis_tready + + + + + + s_axilite + + + + + + + + + AWADDR + + + s_axilite_AWADDR + + + + + AWVALID + + + s_axilite_AWVALID + + + + + AWREADY + + + s_axilite_AWREADY + + + + + WDATA + + + s_axilite_WDATA + + + + + WSTRB + + + s_axilite_WSTRB + + + + + WVALID + + + s_axilite_WVALID + + + + + WREADY + + + s_axilite_WREADY + + + + + BRESP + + + s_axilite_BRESP + + + + + BVALID + + + s_axilite_BVALID + + + + + BREADY + + + s_axilite_BREADY + + + + + ARADDR + + + s_axilite_ARADDR + + + + + ARVALID + + + s_axilite_ARVALID + + + + + ARREADY + + + s_axilite_ARREADY + + + + + RDATA + + + s_axilite_RDATA + + + + + RRESP + + + s_axilite_RRESP + + + + + RVALID + + + s_axilite_RVALID + + + + + RREADY + + + s_axilite_RREADY + + + + + + ap_rst_n + + + + + + + RST + + + ap_rst_n + + + + + + POLARITY + ACTIVE_LOW + + + + + + + s_axilite + s_axilite + + reg0 + reg0 + 0x0 + 4096 + 32 + register + + + + + + + xilinx_anylanguagesynthesis + Synthesis + :vivado.xilinx.com:synthesis + Verilog + thresholding_axi_wrapper + + xilinx_anylanguagesynthesis_view_fileset + + + + viewChecksum + fd0bd85b + + + + + xilinx_anylanguagebehavioralsimulation + Simulation + :vivado.xilinx.com:simulation + Verilog + thresholding_axi_wrapper + + xilinx_anylanguagebehavioralsimulation_view_fileset + + + + viewChecksum + fd0bd85b + + + + + xilinx_xpgui + UI Layout + :vivado.xilinx.com:xgui.ui + + xilinx_xpgui_view_fileset + + + + viewChecksum + fc6b9b63 + + + + + xilinx_utilityxitfiles + Utility XIT/TTCL + :vivado.xilinx.com:xit.util + + xilinx_utilityxitfiles_view_fileset + + + + viewChecksum + 8b0215cd + + + + + + + ap_clk + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + ap_rst_n + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_AWVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_AWREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_AWADDR + + in + + 5 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_WVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_WREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_WDATA + + in + + 31 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_WSTRB + + in + + 3 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 1 + + + + + s_axilite_BVALID + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_BREADY + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_BRESP + + out + + 1 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_ARVALID + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_ARREADY + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_ARADDR + + in + + 5 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_RVALID + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_RREADY + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + s_axilite_RDATA + + out + + 31 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axilite_RRESP + + out + + 1 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axis_tready + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axis_tvalid + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + s_axis_tdata + + in + + 15 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 0 + + + + + m_axis_tready + + in + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + 1 + + + + + m_axis_tvalid + + out + + + std_logic + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + m_axis_tdata + + out + + 7 + 0 + + + + std_logic_vector + xilinx_anylanguagesynthesis + xilinx_anylanguagebehavioralsimulation + + + + + + + + N + N + 4 + + + K + K + 16 + + + C + C + 1 + + + PE + Pe + 1 + + + SIGNED + Signed + true + + + FPARG + Fparg + false + + + BIAS + Bias + 0 + + + CF + Cf + 1 + + + ADDR_BITS + Addr Bits + 6 + + + O_BITS + O Bits + 4 + + + + + + choice_list_9d8b0d81 + ACTIVE_HIGH + ACTIVE_LOW + + + + + xilinx_anylanguagesynthesis_view_fileset + + hdl/thresholding.sv + systemVerilogSource + + + hdl/thresholding_axi.sv + systemVerilogSource + + + hdl/thresholding_axi_wrapper.v + verilogSource + CHECKSUM_7b8c102d + + + hdl/axilite_if.v + verilogSource + CHECKSUM_69d1ba26 + xil_defaultlib + + + + xilinx_anylanguagebehavioralsimulation_view_fileset + + hdl/thresholding.sv + systemVerilogSource + + + hdl/thresholding_axi.sv + systemVerilogSource + + + hdl/thresholding_axi_wrapper.v + verilogSource + + + hdl/axilite_if.v + verilogSource + USED_IN_ipstatic + xil_defaultlib + + + + xilinx_xpgui_view_fileset + + xgui/thresholding_axi_v1_0.tcl + tclSource + CHECKSUM_fc6b9b63 + XGUI_VERSION_2 + + + + xilinx_utilityxitfiles_view_fileset + + gui/thresholding_axi_v1_0.gtcl + GTCL + + + + MultiThreshold + + + N + Output Precision + 4 + + + K + Input Precision + 16 + + + C + Channels + 1 + + + PE + Pe + 1 + + + SIGNED + Signed Inputs + true + + + FPARG + Floating-Point Inputs + false + + + BIAS + Bias + 0 + + + CF + Channel Fold + 1 + + + + false + + + + + + ADDR_BITS + Address Bits + 6 + + + + false + + + + + + O_BITS + Output Value Width + 4 + + + + false + + + + + + Component_Name + thresholding_axi_wrapper_v1_0 + + + + + + virtex7 + qvirtex7 + versal + kintex7 + kintex7l + qkintex7 + qkintex7l + akintex7 + artix7 + artix7l + aartix7 + qartix7 + zynq + qzynq + azynq + spartan7 + aspartan7 + virtexu + zynquplus + virtexuplus + virtexuplusHBM + virtexuplus58g + kintexuplus + artixuplus + kintexu + + + /UserIP + + thresholding_axi + level_1 + package_project + 2 + + user.org:user:thresholding_axi_wrapper:1.0 + + 2023-06-27T05:47:20Z + + + + + + 2022.2 + + + + + + + + + + + + + + diff --git a/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl new file mode 100644 index 0000000000..90d73ede7e --- /dev/null +++ b/finn-rtllib/thresholding/gui/thresholding_axi_v1_0.gtcl @@ -0,0 +1,4 @@ +# This file is automatically written. Do not modify. +proc gen_USERPARAMETER_CF_VALUE {C PE } {expr $C/$PE} +proc gen_USERPARAMETER_ADDR_BITS_VALUE {C PE N } {expr int(ceil(log($C/$PE)/log(2))+ceil(log($PE)/log(2))+$N+2)} +proc gen_USERPARAMETER_O_BITS_VALUE {BIAS N } {expr int(ceil($BIAS >= 0? log(pow(2,$N)+$BIAS)/log(2) : 1+log(-$BIAS >= pow(2,$N-1)? -$BIAS : pow(2,$N)+$BIAS)/log(2)))} diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 98bbe20691..53066901fb 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -39,8 +39,8 @@ *****************************************************************************/ module thresholding_axi #( - int unsigned N = 4, // output precision - int unsigned K = 16, // input/threshold precision + int unsigned N, // output precision + int unsigned K, // input/threshold precision int unsigned C = 1, // Channels int unsigned PE = 1, // Processing Parallelism, requires C = k*PE diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v new file mode 100644 index 0000000000..14c2c13bfd --- /dev/null +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2023, Xilinx + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of FINN nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @author Thomas B. Preußer + * @brief Verilog wrapper for IP packaging. + */ + +module thresholding_axi_wrapper #( + parameter N = 4, // output precision + parameter K = 16, // input/threshold precision + parameter C = 1, // Channels + parameter PE = 1, // Processing Parallelism, requires C = k*PE + + parameter SIGNED = 1, // signed inputs + parameter FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa + parameter BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + + parameter CF = C/PE, // Channel Fold + parameter ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, + parameter O_BITS = $clog2(2**N+BIAS) +)( + // Global Control + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *) + (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) + input ap_clk, + (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) + input ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [ADDR_BITS-1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored + + input s_axilite_WVALID, + output s_axilite_WREADY, + input [31:0] s_axilite_WDATA, + input [ 3:0] s_axilite_WSTRB, + + output s_axilite_BVALID, + input s_axilite_BREADY, + output [1:0] s_axilite_BRESP, + + // Reading + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [ADDR_BITS-1:0] s_axilite_ARADDR, + + output s_axilite_RVALID, + input s_axilite_RREADY, + output [31:0] s_axilite_RDATA, + output [ 1:0] s_axilite_RRESP, + + //- AXI Stream - Input -------------- + output s_axis_tready, + input s_axis_tvalid, + input [((PE*K+7)/8)*8-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input m_axis_tready, + output m_axis_tvalid, + output [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata +); + + thresholding_axi #( + .N(N), .K(K), .C(C), .PE(PE), + .SIGNED(SIGNED), + .FPARG(FPARG), + .BIAS(BIAS) + ) core ( + .ap_clk, .ap_rst_n, + + .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, + .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB, + .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, + + .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, + .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, + + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + +endmodule : thresholding_axi_wrapper diff --git a/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl new file mode 100644 index 0000000000..338304fa40 --- /dev/null +++ b/finn-rtllib/thresholding/xgui/thresholding_axi_v1_0.tcl @@ -0,0 +1,187 @@ + +# Loading additional proc with user specified bodies to compute parameter values. +source [file join [file dirname [file dirname [info script]]] gui/thresholding_axi_v1_0.gtcl] + +# Definitional proc to organize widgets for parameters. +proc init_gui { IPINST } { + ipgui::add_param $IPINST -name "Component_Name" + #Adding Page + set Page_0 [ipgui::add_page $IPINST -name "Page 0"] + ipgui::add_param $IPINST -name "ADDR_BITS" -parent ${Page_0} + ipgui::add_param $IPINST -name "BIAS" -parent ${Page_0} + ipgui::add_param $IPINST -name "C" -parent ${Page_0} + ipgui::add_param $IPINST -name "CF" -parent ${Page_0} + ipgui::add_param $IPINST -name "FPARG" -parent ${Page_0} + ipgui::add_param $IPINST -name "K" -parent ${Page_0} + ipgui::add_param $IPINST -name "N" -parent ${Page_0} + ipgui::add_param $IPINST -name "O_BITS" -parent ${Page_0} + set PE [ipgui::add_param $IPINST -name "PE" -parent ${Page_0}] + set_property tooltip {PE Count} ${PE} + ipgui::add_param $IPINST -name "SIGNED" -parent ${Page_0} + + +} + +proc update_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS PARAM_VALUE.C PARAM_VALUE.PE PARAM_VALUE.N } { + # Procedure called to update ADDR_BITS when any of the dependent parameters in the arguments change + + set ADDR_BITS ${PARAM_VALUE.ADDR_BITS} + set C ${PARAM_VALUE.C} + set PE ${PARAM_VALUE.PE} + set N ${PARAM_VALUE.N} + set values(C) [get_property value $C] + set values(PE) [get_property value $PE] + set values(N) [get_property value $N] + set_property value [gen_USERPARAMETER_ADDR_BITS_VALUE $values(C) $values(PE) $values(N)] $ADDR_BITS +} + +proc validate_PARAM_VALUE.ADDR_BITS { PARAM_VALUE.ADDR_BITS } { + # Procedure called to validate ADDR_BITS + return true +} + +proc update_PARAM_VALUE.CF { PARAM_VALUE.CF PARAM_VALUE.C PARAM_VALUE.PE } { + # Procedure called to update CF when any of the dependent parameters in the arguments change + + set CF ${PARAM_VALUE.CF} + set C ${PARAM_VALUE.C} + set PE ${PARAM_VALUE.PE} + set values(C) [get_property value $C] + set values(PE) [get_property value $PE] + set_property value [gen_USERPARAMETER_CF_VALUE $values(C) $values(PE)] $CF +} + +proc validate_PARAM_VALUE.CF { PARAM_VALUE.CF } { + # Procedure called to validate CF + return true +} + +proc update_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS PARAM_VALUE.BIAS PARAM_VALUE.N } { + # Procedure called to update O_BITS when any of the dependent parameters in the arguments change + + set O_BITS ${PARAM_VALUE.O_BITS} + set BIAS ${PARAM_VALUE.BIAS} + set N ${PARAM_VALUE.N} + set values(BIAS) [get_property value $BIAS] + set values(N) [get_property value $N] + set_property value [gen_USERPARAMETER_O_BITS_VALUE $values(BIAS) $values(N)] $O_BITS +} + +proc validate_PARAM_VALUE.O_BITS { PARAM_VALUE.O_BITS } { + # Procedure called to validate O_BITS + return true +} + +proc update_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } { + # Procedure called to update BIAS when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.BIAS { PARAM_VALUE.BIAS } { + # Procedure called to validate BIAS + return true +} + +proc update_PARAM_VALUE.C { PARAM_VALUE.C } { + # Procedure called to update C when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.C { PARAM_VALUE.C } { + # Procedure called to validate C + return true +} + +proc update_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } { + # Procedure called to update FPARG when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.FPARG { PARAM_VALUE.FPARG } { + # Procedure called to validate FPARG + return true +} + +proc update_PARAM_VALUE.K { PARAM_VALUE.K } { + # Procedure called to update K when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.K { PARAM_VALUE.K } { + # Procedure called to validate K + return true +} + +proc update_PARAM_VALUE.N { PARAM_VALUE.N } { + # Procedure called to update N when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.N { PARAM_VALUE.N } { + # Procedure called to validate N + return true +} + +proc update_PARAM_VALUE.PE { PARAM_VALUE.PE } { + # Procedure called to update PE when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.PE { PARAM_VALUE.PE } { + # Procedure called to validate PE + return true +} + +proc update_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } { + # Procedure called to update SIGNED when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.SIGNED { PARAM_VALUE.SIGNED } { + # Procedure called to validate SIGNED + return true +} + + +proc update_MODELPARAM_VALUE.N { MODELPARAM_VALUE.N PARAM_VALUE.N } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.N}] ${MODELPARAM_VALUE.N} +} + +proc update_MODELPARAM_VALUE.K { MODELPARAM_VALUE.K PARAM_VALUE.K } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.K}] ${MODELPARAM_VALUE.K} +} + +proc update_MODELPARAM_VALUE.C { MODELPARAM_VALUE.C PARAM_VALUE.C } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.C}] ${MODELPARAM_VALUE.C} +} + +proc update_MODELPARAM_VALUE.PE { MODELPARAM_VALUE.PE PARAM_VALUE.PE } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.PE}] ${MODELPARAM_VALUE.PE} +} + +proc update_MODELPARAM_VALUE.SIGNED { MODELPARAM_VALUE.SIGNED PARAM_VALUE.SIGNED } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.SIGNED}] ${MODELPARAM_VALUE.SIGNED} +} + +proc update_MODELPARAM_VALUE.FPARG { MODELPARAM_VALUE.FPARG PARAM_VALUE.FPARG } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.FPARG}] ${MODELPARAM_VALUE.FPARG} +} + +proc update_MODELPARAM_VALUE.BIAS { MODELPARAM_VALUE.BIAS PARAM_VALUE.BIAS } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.BIAS}] ${MODELPARAM_VALUE.BIAS} +} + +proc update_MODELPARAM_VALUE.CF { MODELPARAM_VALUE.CF PARAM_VALUE.CF } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.CF}] ${MODELPARAM_VALUE.CF} +} + +proc update_MODELPARAM_VALUE.ADDR_BITS { MODELPARAM_VALUE.ADDR_BITS PARAM_VALUE.ADDR_BITS } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.ADDR_BITS}] ${MODELPARAM_VALUE.ADDR_BITS} +} + +proc update_MODELPARAM_VALUE.O_BITS { MODELPARAM_VALUE.O_BITS PARAM_VALUE.O_BITS } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.O_BITS}] ${MODELPARAM_VALUE.O_BITS} +} From bc5b73868d90a8fa8e9a5d59233529331d9f4369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 15 Nov 2023 14:47:07 +0000 Subject: [PATCH 098/111] Allow for custom start-up intialization of thresholds. --- finn-rtllib/thresholding/hdl/thresholding.sv | 28 +++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index bfd7e5d8ff..56038061c2 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -29,7 +29,7 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @brief Pipelined thresholding by binary search. - * @author Thomas B. Preußer + * @author Thomas B. Preußer * * @description * Produces the N-bit count of those among 2^N-1 thresholds that are not @@ -42,6 +42,14 @@ * with respect to a selectable set of thresholds. The corresponding * threshold configuration relies on a channel address prefix. Inputs are * accompanied by a channel selector. + * + * Parameter Layout as seen on AXI-Lite (row by row): + * | Base \ Offs | 0 1 2 ... N-2 N-1 + * ---------+------------------------------+---------------------------------- + * Chnl #0 | 0 | T_0 T_1 T_2 ... T_{N-2} 'x + * Chnl #1 | N | T_0 T_1 T_2 ... T_{N-2} 'x + * Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*N | T_0 T_1 T_2 ... T_{N-2} 'x + * *****************************************************************************/ module thresholding #( int unsigned N, // output precision @@ -53,6 +61,9 @@ module thresholding #( bit FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + // Initial Thresholds (per channel) + logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, + localparam int unsigned CF = C/PE, // Channel fold localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : @@ -136,7 +147,6 @@ module thresholding #( end end - uwire ptr_t iptr; assign iptr[0+:N] = cfg_a[0+:N]; if(CF > 1) begin @@ -180,16 +190,26 @@ module thresholding #( uwire cs = (p.ptr[SN:0] == 2**SN-1); // Threshold Memory - logic [K-1:0] Thresh = 'x; // Read-out register + val_t Thresh; // Read-out register if(1) begin : blkThreshMem uwire we = (p.op ==? WR) && cs; if((CF == 1) && (stage == 0)) begin + initial begin + Thresh = THRESHOLDS[pe][2**SN-1]; + end always_ff @(posedge clk) begin if(we) Thresh <= p.val; end end else begin - logic [K-1:0] Threshs[CF * 2**stage]; + val_t Threshs[CF * 2**stage]; + initial begin + for(int unsigned c = 0; c < CF; c++) begin + for(int unsigned i = 0; i < 2**stage; i++) begin + Threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1]; + end + end + end uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; always_ff @(posedge clk) begin if(we) Threshs[addr] <= p.val; From 730bcf83e69a39e881d6b468b69001332bd78b0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 15 Nov 2023 14:49:55 +0000 Subject: [PATCH 099/111] Make AXI-Lite threshold read/write interface optional. --- .../thresholding/hdl/thresholding_axi.sv | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 53066901fb..edfbaf891c 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -48,6 +48,11 @@ module thresholding_axi #( bit FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + // Initial Thresholds (per channel) + logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, + + bit HAVE_AXILITE = 1, // Activate AXI-Lite for threshold read/write + localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, localparam int unsigned O_BITS = BIAS >= 0? @@ -102,19 +107,28 @@ module thresholding_axi #( uwire [K -1:0] cfg_d; uwire cfg_rack; uwire [K -1:0] cfg_q; - axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi ( - .aclk(ap_clk), .aresetn(ap_rst_n), - - .awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x), - .wready(s_axilite_WREADY), .wvalid(s_axilite_WVALID), .wdata(s_axilite_WDATA), .wstrb(s_axilite_WSTRB), - .bready(s_axilite_BREADY), .bvalid(s_axilite_BVALID), .bresp(s_axilite_BRESP), - .arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x), - .rready(s_axilite_RREADY), .rvalid(s_axilite_RVALID), .rresp(s_axilite_RRESP), .rdata(s_axilite_RDATA), - - .ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d), - .ip_rack(cfg_rack), .ip_rdata(cfg_q) - ); + if(HAVE_AXILITE) begin + axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi ( + .aclk(ap_clk), .aresetn(ap_rst_n), + + .awready(s_axilite_AWREADY), .awvalid(s_axilite_AWVALID), .awaddr(s_axilite_AWADDR), .awprot('x), + .wready(s_axilite_WREADY), .wvalid(s_axilite_WVALID), .wdata(s_axilite_WDATA), .wstrb(s_axilite_WSTRB), + .bready(s_axilite_BREADY), .bvalid(s_axilite_BVALID), .bresp(s_axilite_BRESP), + + .arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x), + .rready(s_axilite_RREADY), .rvalid(s_axilite_RVALID), .rresp(s_axilite_RRESP), .rdata(s_axilite_RDATA), + + .ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d), + .ip_rack(cfg_rack), .ip_rdata(cfg_q) + ); + end + else begin + assign cfg_en = 0; + assign cfg_we = 'x; + assign cfg_a = 'x; + assign cfg_d = 'x; + end //----------------------------------------------------------------------- // Kernel Implementation From 95d6a3eca503e388066558989022aebe951a0ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Thu, 16 Nov 2023 09:18:20 +0000 Subject: [PATCH 100/111] Double wrapping for both structured SystemVerilog parameters and a IPI-compatible Verilog top-level. --- .../thresholding/hdl/thresholding_axi.sv | 10 +- .../hdl/thresholding_axi_tpl_inner.sv | 116 ++++++++++++++++++ ...wrapper.v => thresholding_axi_tpl_outer.v} | 30 ++--- 3 files changed, 139 insertions(+), 17 deletions(-) create mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv rename finn-rtllib/thresholding/hdl/{thresholding_axi_wrapper.v => thresholding_axi_tpl_outer.v} (75%) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index edfbaf891c..20bdff6d25 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -51,7 +51,7 @@ module thresholding_axi #( // Initial Thresholds (per channel) logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, - bit HAVE_AXILITE = 1, // Activate AXI-Lite for threshold read/write + bit USE_AXILITE, // Implement AXI-Lite for threshold read/write localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, @@ -108,7 +108,7 @@ module thresholding_axi #( uwire cfg_rack; uwire [K -1:0] cfg_q; - if(HAVE_AXILITE) begin + if(USE_AXILITE) begin axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi ( .aclk(ap_clk), .aresetn(ap_rst_n), @@ -132,7 +132,11 @@ module thresholding_axi #( //----------------------------------------------------------------------- // Kernel Implementation - thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS)) impl ( + thresholding #( + .N(N), .K(K), .C(C), .PE(PE), + .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), + .THRESHOLDS(THRESHOLDS) + ) impl ( .clk(ap_clk), .rst(!ap_rst_n), .cfg_en, .cfg_we, .cfg_a, .cfg_d, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv new file mode 100644 index 0000000000..4c28e391c8 --- /dev/null +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -0,0 +1,116 @@ +/** + * Copyright (c) 2023, Xilinx + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of FINN nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @author Thomas B. Preußer + * @brief Verilog wrapper for IP packaging. + */ + +module thresholding_axi_tpl_inner #( + int unsigned N, // output precision + int unsigned K, // input/threshold precision + int unsigned C, // Channels + int unsigned PE, // Processing Parallelism, requires C = k*PE + + int unsigned SIGNED, // signed inputs + int unsigned FPARG, // floating-point inputs: [sign] | exponent | mantissa + int unsigned BIAS, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + + logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ + '{ 'hC0, 'hC1, 'hC2, 'hC3, 'hC4, 'hC5, 'hC6, 'hC7, 'hC8, 'hC9, 'hCa, 'hCb, 'hCc, 'hCd, 'hCe }, + '{ 'hD0, 'hD1, 'hD2, 'hD3, 'hD4, 'hD5, 'hD6, 'hD7, 'hD8, 'hD9, 'hDa, 'hDb, 'hDc, 'hDd, 'hDe }, + '{ 'hE0, 'hE1, 'hE2, 'hE3, 'hE4, 'hE5, 'hE6, 'hE7, 'hE8, 'hE9, 'hEa, 'hEb, 'hEc, 'hEd, 'hEe }, + '{ 'hF0, 'hF1, 'hF2, 'hF3, 'hF4, 'hF5, 'hF6, 'hF7, 'hF8, 'hF9, 'hFa, 'hFb, 'hFc, 'hFd, 'hFe } + }, + bit USE_AXILITE, // Implement AXI-Lite for threshold read/write + + localparam int unsigned CF = C/PE, // Channel Fold + localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, + localparam int unsigned O_BITS = $clog2(2**N+BIAS) +)( + // Global Control + input ap_clk, + input ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [ADDR_BITS-1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored + + input s_axilite_WVALID, + output s_axilite_WREADY, + input [31:0] s_axilite_WDATA, + input [ 3:0] s_axilite_WSTRB, + + output s_axilite_BVALID, + input s_axilite_BREADY, + output [1:0] s_axilite_BRESP, + + // Reading + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [ADDR_BITS-1:0] s_axilite_ARADDR, + + output s_axilite_RVALID, + input s_axilite_RREADY, + output [31:0] s_axilite_RDATA, + output [ 1:0] s_axilite_RRESP, + + //- AXI Stream - Input -------------- + output s_axis_tready, + input s_axis_tvalid, + input [((PE*K+7)/8)*8-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input m_axis_tready, + output m_axis_tvalid, + output [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata +); + + thresholding_axi #( + .N(N), .K(K), .C(C), .PE(PE), + .SIGNED(SIGNED), + .FPARG(FPARG), + .BIAS(BIAS), + .THRESHOLDS(THRESHOLDS), + .USE_AXILITE(USE_AXILITE) + ) core ( + .ap_clk, .ap_rst_n, + + .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, + .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB, + .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, + + .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, + .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + +endmodule : thresholding_axi_tpl_inner diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v similarity index 75% rename from finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v rename to finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v index 14c2c13bfd..5dfe58287d 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_wrapper.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v @@ -31,16 +31,18 @@ * @brief Verilog wrapper for IP packaging. */ -module thresholding_axi_wrapper #( +module thresholding_axi_tpl_outer #( parameter N = 4, // output precision parameter K = 16, // input/threshold precision - parameter C = 1, // Channels + parameter C = 4, // Channels parameter PE = 1, // Processing Parallelism, requires C = k*PE parameter SIGNED = 1, // signed inputs parameter FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa parameter BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + parameter USE_AXILITE = 0, // Implement AXI-Lite for threshold read/write + parameter CF = C/PE, // Channel Fold parameter ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, parameter O_BITS = $clog2(2**N+BIAS) @@ -88,23 +90,23 @@ module thresholding_axi_wrapper #( output [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata ); - thresholding_axi #( + thresholding_axi_tpl_inner #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), - .BIAS(BIAS) + .BIAS(BIAS), + .USE_AXILITE(USE_AXILITE) ) core ( - .ap_clk, .ap_rst_n, - - .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, - .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB, - .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, + .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), - .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, - .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, + .s_axilite_AWVALID(s_axilite_AWVALID), .s_axilite_AWREADY(s_axilite_AWREADY), .s_axilite_AWADDR(s_axilite_AWADDR), + .s_axilite_WVALID(s_axilite_WVALID), .s_axilite_WREADY(s_axilite_WREADY), .s_axilite_WDATA(s_axilite_WDATA), .s_axilite_WSTRB(s_axilite_WSTRB), + .s_axilite_BVALID(s_axilite_BVALID), .s_axilite_BREADY(s_axilite_BREADY), .s_axilite_BRESP(s_axilite_BRESP), - .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, - .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + .s_axilite_ARVALID(s_axilite_ARVALID), .s_axilite_ARREADY(s_axilite_ARREADY), .s_axilite_ARADDR(s_axilite_ARADDR), + .s_axilite_RVALID(s_axilite_RVALID), .s_axilite_RREADY(s_axilite_RREADY), .s_axilite_RDATA(s_axilite_RDATA), .s_axilite_RRESP(s_axilite_RRESP), + .s_axis_tready(s_axis_tready), .s_axis_tvalid(s_axis_tvalid), .s_axis_tdata(s_axis_tdata), + .m_axis_tready(m_axis_tready), .m_axis_tvalid(m_axis_tvalid), .m_axis_tdata(m_axis_tdata) ); -endmodule : thresholding_axi_wrapper +endmodule // thresholding_axi_tpl_outer From 8003c9145d681fa5cf39720ad280d469533dbca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Thu, 16 Nov 2023 09:29:22 +0000 Subject: [PATCH 101/111] Templating the wrapper layers for specialization by FINN compiler. --- .../hdl/thresholding_axi_tpl_inner.sv | 7 +--- .../hdl/thresholding_axi_tpl_outer.v | 32 +++++++++---------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv index 4c28e391c8..f52d8d6a31 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -41,12 +41,7 @@ module thresholding_axi_tpl_inner #( int unsigned FPARG, // floating-point inputs: [sign] | exponent | mantissa int unsigned BIAS, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ - '{ 'hC0, 'hC1, 'hC2, 'hC3, 'hC4, 'hC5, 'hC6, 'hC7, 'hC8, 'hC9, 'hCa, 'hCb, 'hCc, 'hCd, 'hCe }, - '{ 'hD0, 'hD1, 'hD2, 'hD3, 'hD4, 'hD5, 'hD6, 'hD7, 'hD8, 'hD9, 'hDa, 'hDb, 'hDc, 'hDd, 'hDe }, - '{ 'hE0, 'hE1, 'hE2, 'hE3, 'hE4, 'hE5, 'hE6, 'hE7, 'hE8, 'hE9, 'hEa, 'hEb, 'hEc, 'hEd, 'hEe }, - '{ 'hF0, 'hF1, 'hF2, 'hF3, 'hF4, 'hF5, 'hF6, 'hF7, 'hF8, 'hF9, 'hFa, 'hFb, 'hFc, 'hFd, 'hFe } - }, + logic [K-1:0] THRESHOLDS[C][2**N-1] = $THRESHOLDS$, bit USE_AXILITE, // Implement AXI-Lite for threshold read/write localparam int unsigned CF = C/PE, // Channel Fold diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v index 5dfe58287d..3521987b66 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v @@ -32,20 +32,18 @@ */ module thresholding_axi_tpl_outer #( - parameter N = 4, // output precision - parameter K = 16, // input/threshold precision - parameter C = 4, // Channels - parameter PE = 1, // Processing Parallelism, requires C = k*PE + parameter N = $N$, // output precision + parameter K = $M$, // input/threshold precision + parameter C = $C$, // Channels + parameter PE = $PE$, // Processing Parallelism, requires C = k*PE - parameter SIGNED = 1, // signed inputs - parameter FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa - parameter BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + parameter SIGNED = $SIGNED$, // signed inputs + parameter FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa + parameter BIAS = $BIAS$, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - parameter USE_AXILITE = 0, // Implement AXI-Lite for threshold read/write + parameter USE_AXILITE = $USE_AXILITE$, // Implement AXI-Lite for threshold read/write - parameter CF = C/PE, // Channel Fold - parameter ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, - parameter O_BITS = $clog2(2**N+BIAS) + parameter O_BITS = $O_BITS$ )( // Global Control (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *) @@ -56,9 +54,9 @@ module thresholding_axi_tpl_outer #( //- AXI Lite ------------------------ // Writing - input s_axilite_AWVALID, - output s_axilite_AWREADY, - input [ADDR_BITS-1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [$clog2(C/PE) + $clog2(PE) + N + 1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored input s_axilite_WVALID, output s_axilite_WREADY, @@ -70,9 +68,9 @@ module thresholding_axi_tpl_outer #( output [1:0] s_axilite_BRESP, // Reading - input s_axilite_ARVALID, - output s_axilite_ARREADY, - input [ADDR_BITS-1:0] s_axilite_ARADDR, + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [$clog2(C/PE) + $clog2(PE) + N + 1:0] s_axilite_ARADDR, output s_axilite_RVALID, input s_axilite_RREADY, From d9db2574f73b1fc7e88c3fe46d4d2d853b71f5b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 20 Nov 2023 08:06:10 +0000 Subject: [PATCH 102/111] Replicate correct O_BITS computation for negative BIASes in inner wrapper. --- finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv index f52d8d6a31..34a2d46706 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -46,7 +46,9 @@ module thresholding_axi_tpl_inner #( localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, - localparam int unsigned O_BITS = $clog2(2**N+BIAS) + localparam int unsigned O_BITS = BIAS >= 0? + /* unsigned */ $clog2(2**N+BIAS) : + /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) )( // Global Control input ap_clk, From 073844ac2de17d84d548e0b71527d71861f67c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 20 Nov 2023 18:40:10 +0000 Subject: [PATCH 103/111] Correcting wrong unsigned interpretation of BIAS in inner wrapper. --- finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv index 34a2d46706..ddda5a88ed 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -39,7 +39,7 @@ module thresholding_axi_tpl_inner #( int unsigned SIGNED, // signed inputs int unsigned FPARG, // floating-point inputs: [sign] | exponent | mantissa - int unsigned BIAS, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + int BIAS, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] logic [K-1:0] THRESHOLDS[C][2**N-1] = $THRESHOLDS$, bit USE_AXILITE, // Implement AXI-Lite for threshold read/write From 98184ac959b9a901bba07ee6d9f2252c0cb51ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Mon, 20 Nov 2023 19:01:59 +0000 Subject: [PATCH 104/111] Working around an LRM ambiguity when also having assignments in initial blocks. --- finn-rtllib/thresholding/hdl/thresholding.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 56038061c2..6ecccbe7b6 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -197,7 +197,7 @@ module thresholding #( initial begin Thresh = THRESHOLDS[pe][2**SN-1]; end - always_ff @(posedge clk) begin + always @(posedge clk) begin if(we) Thresh <= p.val; end end @@ -211,7 +211,7 @@ module thresholding #( end end uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; - always_ff @(posedge clk) begin + always @(posedge clk) begin if(we) Threshs[addr] <= p.val; Thresh <= Threshs[addr]; end From 528184d0b34f4b64a21aa92570e8768ed0ec4507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 21 Nov 2023 08:01:55 +0000 Subject: [PATCH 105/111] Attempt to mitigate long elaboration times by more explicit configuration masking. --- finn-rtllib/thresholding/hdl/thresholding.sv | 66 +++++++++++------ .../thresholding/hdl/thresholding_axi.sv | 2 +- .../thresholding/sim/thresholding_axi_tb.sv | 72 ++++++++++--------- .../thresholding/sim/thresholding_tb.sv | 2 +- 4 files changed, 84 insertions(+), 58 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 6ecccbe7b6..c56e2a994e 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -63,6 +63,7 @@ module thresholding #( // Initial Thresholds (per channel) logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, + bit USE_CONFIG = 1, localparam int unsigned CF = C/PE, // Channel fold localparam int unsigned O_BITS = BIAS >= 0? @@ -132,7 +133,7 @@ module thresholding #( always_ff @(posedge clk) begin if(rst) GuardSem <= N+2; else begin - automatic logic dec = !cfg_en && !th_full && ivld; + automatic logic dec = !(USE_CONFIG && cfg_en) && !th_full && ivld; automatic logic inc = ovld && ordy; GuardSem <= GuardSem + (inc == dec? 0 : inc? 1 : -1); end @@ -143,7 +144,7 @@ module thresholding #( if(PE == 1) assign cfg_sel[0] = 1; else begin for(genvar pe = 0; pe < PE; pe++) begin - assign cfg_sel[pe] = cfg_en && (cfg_a[N+:$clog2(PE)] == pe); + assign cfg_sel[pe] = USE_CONFIG && cfg_en && (cfg_a[N+:$clog2(PE)] == pe); end end @@ -158,26 +159,26 @@ module thresholding #( CnlCnt <= 0; CnlLst <= 0; end - else if(!cfg_en && !th_full && ivld) begin + else if(!(USE_CONFIG && cfg_en) && !th_full && ivld) begin CnlCnt <= CnlCnt + (CnlLst? 1-CF : 1); CnlLst <= CnlCnt == CF-2; end end - assign iptr[N+:$clog2(CF)] = cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt; + assign iptr[N+:$clog2(CF)] = USE_CONFIG && cfg_en? cfg_a[N+$clog2(PE)+:$clog2(CF)] : CnlCnt; end for(genvar pe = 0; pe < PE; pe++) begin assign pipe[pe][0] = '{ - op: cfg_en? + op: USE_CONFIG && cfg_en? (!cfg_sel[pe]? NOP : cfg_we? WR : RB) : (ivld && !th_full? TH : NOP), ptr: iptr, - val: !cfg_en? idat[pe] : cfg_we? cfg_d : 0 + val: !(USE_CONFIG && cfg_en)? idat[pe] : cfg_we? cfg_d : 0 }; end - assign irdy = !cfg_en && !th_full; + assign irdy = !(USE_CONFIG && cfg_en) && !th_full; end : blkFeed //----------------------------------------------------------------------- @@ -191,17 +192,10 @@ module thresholding #( // Threshold Memory val_t Thresh; // Read-out register - if(1) begin : blkThreshMem - uwire we = (p.op ==? WR) && cs; - if((CF == 1) && (stage == 0)) begin - initial begin - Thresh = THRESHOLDS[pe][2**SN-1]; - end - always @(posedge clk) begin - if(we) Thresh <= p.val; - end - end - else begin + if(1) begin : blkThresh + + uwire val_t threshs[CF * 2**stage]; + if(USE_CONFIG) begin : genThreshMem val_t Threshs[CF * 2**stage]; initial begin for(int unsigned c = 0; c < CF; c++) begin @@ -210,13 +204,41 @@ module thresholding #( end end end + + uwire we = (p.op ==? WR) && cs; + if((CF == 1) && (stage == 0)) begin + always @(posedge clk) begin + if(we) Threshs[0] <= p.val; + end + end + else begin + uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; + always @(posedge clk) begin + if(we) Threshs[addr] <= p.val; + end + end + + assign threshs = Threshs; + end : genThreshMem + else begin : genThreshCst + for(genvar c = 0; c < CF; c++) begin + for(genvar i = 0; i < 2**stage; i++) begin + assign threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1]; + end + end + end : genThreshCst + + if((CF == 1) && (stage == 0)) begin + assign Thresh = threshs[0]; + end + else begin uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; - always @(posedge clk) begin - if(we) Threshs[addr] <= p.val; - Thresh <= Threshs[addr]; + always_ff @(posedge clk) begin + Thresh <= threshs[addr]; end end - end : blkThreshMem + + end : blkThresh // Pipeline State pipe_t P = '{ op: NOP, default: 'x }; diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 20bdff6d25..67c2213dfb 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -135,7 +135,7 @@ module thresholding_axi #( thresholding #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), - .THRESHOLDS(THRESHOLDS) + .THRESHOLDS(THRESHOLDS), .USE_CONFIG(USE_AXILITE) ) impl ( .clk(ap_clk), .rst(!ap_rst_n), diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv index 200d4d5999..926c318adc 100644 --- a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv +++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv @@ -57,6 +57,7 @@ module thresholding_axi_tb #( localparam int unsigned C_BITS = C < 2? 1 : $clog2(C); localparam int unsigned MST_STRM_WROUNDS = 503; + localparam bit DYNAMIC_CONFIG = 0; typedef int unsigned threshs_t[C][2**N-1]; function threshs_t init_thresholds(); @@ -110,7 +111,8 @@ module thresholding_axi_tb #( uwire ovld; uwire [PE-1:0][N-1:0] odat; - thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0)) dut ( + localparam threshs_t THRESHS_STATIC = DYNAMIC_CONFIG? '{ default: '{ default: 'x } } : THRESHS; + thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .THRESHOLDS(THRESHS_STATIC), .USE_AXILITE(1)) dut ( .ap_clk(clk), .ap_rst_n(!rst), // Configuration @@ -158,42 +160,44 @@ module thresholding_axi_tb #( @(posedge clk iff !rst); - // Threshold Configuratin - for(int unsigned c = 0; c < C; c+=PE) begin - automatic addr_t addr = 0; - if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = c/PE; - for(int unsigned pe = 0; pe < PE; pe++) begin - if(PE > 1) addr[N+:$clog2(PE)] = pe; - for(int unsigned t = 0; t < 2**N-1; t++) begin - addr[0+:N] = t; - fork - begin - s_axilite_AWVALID <= 1; - s_axilite_AWADDR <= { addr, 2'b00 }; - @(posedge clk iff s_axilite_AWREADY); - s_axilite_AWVALID <= 0; - s_axilite_AWADDR <= 'x; - end - begin - s_axilite_WVALID <= 1; - s_axilite_WDATA <= THRESHS[c+pe][t]; - @(posedge clk iff s_axilite_WREADY); - s_axilite_WVALID <= 0; - s_axilite_WDATA <= 'x; - end - begin - s_axilite_BREADY <= 1; - @(posedge clk iff s_axilite_BVALID); - assert(s_axilite_BRESP == '0) else begin - $error("Error on parameter write."); - $stop; + // Threshold Configuration + if(DYNAMIC_CONFIG) begin : blkConfig + for(int unsigned c = 0; c < C; c+=PE) begin + automatic addr_t addr = 0; + if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = c/PE; + for(int unsigned pe = 0; pe < PE; pe++) begin + if(PE > 1) addr[N+:$clog2(PE)] = pe; + for(int unsigned t = 0; t < 2**N-1; t++) begin + addr[0+:N] = t; + fork + begin + s_axilite_AWVALID <= 1; + s_axilite_AWADDR <= { addr, 2'b00 }; + @(posedge clk iff s_axilite_AWREADY); + s_axilite_AWVALID <= 0; + s_axilite_AWADDR <= 'x; end - s_axilite_BREADY <= 0; - end - join + begin + s_axilite_WVALID <= 1; + s_axilite_WDATA <= THRESHS[c+pe][t]; + @(posedge clk iff s_axilite_WREADY); + s_axilite_WVALID <= 0; + s_axilite_WDATA <= 'x; + end + begin + s_axilite_BREADY <= 1; + @(posedge clk iff s_axilite_BVALID); + assert(s_axilite_BRESP == '0) else begin + $error("Error on parameter write."); + $stop; + end + s_axilite_BREADY <= 0; + end + join + end end end - end + end : blkConfig fork // Intermittent configuration readback diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv index 90dfba1022..20f3879422 100644 --- a/finn-rtllib/thresholding/sim/thresholding_tb.sv +++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv @@ -85,7 +85,7 @@ module thresholding_tb #( uwire ovld; uwire [PE-1:0][N-1:0] odat; - thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG)) dut ( + thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1)) dut ( .clk, .rst, // Configuration From 8fb250c9f6e340227608b0b455d6d3ce66376c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Thu, 23 Nov 2023 18:45:51 +0000 Subject: [PATCH 106/111] Switching to threshold initialization from hex data files. --- finn-rtllib/thresholding/hdl/thresholding.sv | 33 +++------ .../thresholding/hdl/thresholding_axi.sv | 18 +++-- .../hdl/thresholding_axi_tpl_inner.sv | 20 +++--- .../hdl/thresholding_axi_tpl_outer.v | 2 + finn-rtllib/thresholding/sim/thresh_gen.sv | 45 ++++++++++++ .../thresholding/sim/thresholding_axi_tb.sv | 70 +++++++++---------- 6 files changed, 113 insertions(+), 75 deletions(-) create mode 100644 finn-rtllib/thresholding/sim/thresh_gen.sv diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index c56e2a994e..ff3d4172ab 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -61,8 +61,8 @@ module thresholding #( bit FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - // Initial Thresholds (per channel) - logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, + // Initial Thresholds + parameter THRESHOLDS_PATH = "", bit USE_CONFIG = 1, localparam int unsigned CF = C/PE, // Channel fold @@ -194,17 +194,13 @@ module thresholding #( val_t Thresh; // Read-out register if(1) begin : blkThresh - uwire val_t threshs[CF * 2**stage]; - if(USE_CONFIG) begin : genThreshMem - val_t Threshs[CF * 2**stage]; - initial begin - for(int unsigned c = 0; c < CF; c++) begin - for(int unsigned i = 0; i < 2**stage; i++) begin - Threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1]; - end - end - end + val_t Threshs[CF * 2**stage]; + if(THRESHOLDS_PATH != "") begin + localparam FILE = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage); + initial $readmemh(FILE, Threshs); + end + if(USE_CONFIG) begin : genThreshMem uwire we = (p.op ==? WR) && cs; if((CF == 1) && (stage == 0)) begin always @(posedge clk) begin @@ -217,24 +213,15 @@ module thresholding #( if(we) Threshs[addr] <= p.val; end end - - assign threshs = Threshs; end : genThreshMem - else begin : genThreshCst - for(genvar c = 0; c < CF; c++) begin - for(genvar i = 0; i < 2**stage; i++) begin - assign threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1]; - end - end - end : genThreshCst if((CF == 1) && (stage == 0)) begin - assign Thresh = threshs[0]; + assign Thresh = Threshs[0]; end else begin uwire [$clog2(CF)+stage-1:0] addr = p.ptr[$clog2(CF)+N-1:SN+1]; always_ff @(posedge clk) begin - Thresh <= threshs[addr]; + Thresh <= Threshs[addr]; end end diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 67c2213dfb..69617a20d9 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -48,8 +48,8 @@ module thresholding_axi #( bit FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - // Initial Thresholds (per channel) - logic [K-1:0] THRESHOLDS[C][2**N-1] = '{ default: '{ default: '0 } }, + // Initial Thresholds + parameter THRESHOLDS_PATH = "", bit USE_AXILITE, // Implement AXI-Lite for threshold read/write @@ -103,12 +103,13 @@ module thresholding_axi #( // AXI-lite Configuration Interface uwire cfg_en; uwire cfg_we; - uwire [ADDR_BITS-1:0] cfg_a; + uwire [ADDR_BITS-3:0] cfg_a; uwire [K -1:0] cfg_d; uwire cfg_rack; uwire [K -1:0] cfg_q; if(USE_AXILITE) begin + uwire [ADDR_BITS-1:0] cfg_a0; axi4lite_if #(.ADDR_WIDTH(ADDR_BITS), .DATA_WIDTH(32), .IP_DATA_WIDTH(K)) axi ( .aclk(ap_clk), .aresetn(ap_rst_n), @@ -119,9 +120,16 @@ module thresholding_axi #( .arready(s_axilite_ARREADY), .arvalid(s_axilite_ARVALID), .araddr(s_axilite_ARADDR), .arprot('x), .rready(s_axilite_RREADY), .rvalid(s_axilite_RVALID), .rresp(s_axilite_RRESP), .rdata(s_axilite_RDATA), - .ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a), .ip_wdata(cfg_d), + .ip_en(cfg_en), .ip_wen(cfg_we), .ip_addr(cfg_a0), .ip_wdata(cfg_d), .ip_rack(cfg_rack), .ip_rdata(cfg_q) ); + assign cfg_a = cfg_a0[ADDR_BITS-3:0]; + always_ff @(posedge ap_clk) begin + assert(!ap_rst_n || !cfg_en || (cfg_a0[ADDR_BITS-2+:2] === 3'h0)) else begin + $error("%m: Spurious high address bits."); + $stop; + end + end end else begin assign cfg_en = 0; @@ -135,7 +143,7 @@ module thresholding_axi #( thresholding #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), - .THRESHOLDS(THRESHOLDS), .USE_CONFIG(USE_AXILITE) + .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE) ) impl ( .clk(ap_clk), .rst(!ap_rst_n), diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv index ddda5a88ed..b1350a9f31 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -32,17 +32,17 @@ */ module thresholding_axi_tpl_inner #( - int unsigned N, // output precision - int unsigned K, // input/threshold precision - int unsigned C, // Channels - int unsigned PE, // Processing Parallelism, requires C = k*PE + int unsigned N = 4, // output precision + int unsigned K = 9, // input/threshold precision + int unsigned C = 6, // Channels + int unsigned PE = 2, // Processing Parallelism, requires C = k*PE - int unsigned SIGNED, // signed inputs - int unsigned FPARG, // floating-point inputs: [sign] | exponent | mantissa - int BIAS, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + int unsigned SIGNED = 1, // signed inputs + int unsigned FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa + int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - logic [K-1:0] THRESHOLDS[C][2**N-1] = $THRESHOLDS$, - bit USE_AXILITE, // Implement AXI-Lite for threshold read/write + parameter THRESHOLDS_PATH = "../../../data", + bit USE_AXILITE = 1, // Implement AXI-Lite for threshold read/write localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, @@ -95,7 +95,7 @@ module thresholding_axi_tpl_inner #( .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), - .THRESHOLDS(THRESHOLDS), + .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_AXILITE(USE_AXILITE) ) core ( .ap_clk, .ap_rst_n, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v index 3521987b66..13c8189f0e 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v @@ -41,6 +41,7 @@ module thresholding_axi_tpl_outer #( parameter FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa parameter BIAS = $BIAS$, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] + parameter THRESHOLDS_PATH = $THRESHOLDS_PATH$, // Directory with initial threshold data parameter USE_AXILITE = $USE_AXILITE$, // Implement AXI-Lite for threshold read/write parameter O_BITS = $O_BITS$ @@ -93,6 +94,7 @@ module thresholding_axi_tpl_outer #( .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), + .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_AXILITE(USE_AXILITE) ) core ( .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), diff --git a/finn-rtllib/thresholding/sim/thresh_gen.sv b/finn-rtllib/thresholding/sim/thresh_gen.sv new file mode 100644 index 0000000000..a8a18be691 --- /dev/null +++ b/finn-rtllib/thresholding/sim/thresh_gen.sv @@ -0,0 +1,45 @@ +module thresh_gen; + localparam int unsigned K = 9; + localparam int unsigned N = 4; + localparam int unsigned C = 6; + + typedef logic [K-1:0] thresh_t; + localparam thresh_t THRESHOLDS[C][2**N-1] = '{ + '{ 'h00, 'h01, 'h02, 'h03, 'h04, 'h05, 'h06, 'h07, 'h08, 'h09, 'h0a, 'h0b, 'h0c, 'h0d, 'h0e }, + '{ 'h10, 'h11, 'h12, 'h13, 'h14, 'h15, 'h16, 'h17, 'h18, 'h19, 'h1a, 'h1b, 'h1c, 'h1d, 'h1e }, + '{ 'h20, 'h21, 'h22, 'h23, 'h24, 'h25, 'h26, 'h27, 'h28, 'h29, 'h2a, 'h2b, 'h2c, 'h2d, 'h2e }, + '{ 'h30, 'h31, 'h32, 'h33, 'h34, 'h35, 'h36, 'h37, 'h38, 'h39, 'h3a, 'h3b, 'h3c, 'h3d, 'h3e }, + '{ 'h40, 'h41, 'h42, 'h43, 'h44, 'h45, 'h46, 'h47, 'h48, 'h49, 'h4a, 'h4b, 'h4c, 'h4d, 'h4e }, + '{ 'h50, 'h51, 'h52, 'h53, 'h54, 'h55, 'h56, 'h57, 'h58, 'h59, 'h5a, 'h5b, 'h5c, 'h5d, 'h5e } + }; + localparam THRESHOLDS_PATH = "."; + + localparam int unsigned PE = 2; + localparam int unsigned CF = C/PE; + + for(genvar stage = 0; stage < N; stage++) begin + localparam int unsigned SN = N-1-stage; + for(genvar pe = 0; pe < PE; pe++) begin + initial begin + automatic string file = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage); + + automatic thresh_t threshs[CF * 2**stage]; + for(int unsigned c = 0; c < CF; c++) begin + for(int unsigned i = 0; i < 2**stage; i++) begin + threshs[(c << stage) + i] = THRESHOLDS[c*PE + pe][(i<<(N-stage)) + 2**SN-1]; + end + end + + $writememh(file, threshs); + end + end + end + + // Quit after running all initializers + initial begin + #1ns; + $display("Generation done."); + $finish; + end + +endmodule : thresh_gen diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv index 926c318adc..918f539d15 100644 --- a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv +++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv @@ -57,7 +57,6 @@ module thresholding_axi_tb #( localparam int unsigned C_BITS = C < 2? 1 : $clog2(C); localparam int unsigned MST_STRM_WROUNDS = 503; - localparam bit DYNAMIC_CONFIG = 0; typedef int unsigned threshs_t[C][2**N-1]; function threshs_t init_thresholds(); @@ -111,8 +110,7 @@ module thresholding_axi_tb #( uwire ovld; uwire [PE-1:0][N-1:0] odat; - localparam threshs_t THRESHS_STATIC = DYNAMIC_CONFIG? '{ default: '{ default: 'x } } : THRESHS; - thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .THRESHOLDS(THRESHS_STATIC), .USE_AXILITE(1)) dut ( + thresholding_axi #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(0), .USE_AXILITE(1)) dut ( .ap_clk(clk), .ap_rst_n(!rst), // Configuration @@ -161,43 +159,41 @@ module thresholding_axi_tb #( @(posedge clk iff !rst); // Threshold Configuration - if(DYNAMIC_CONFIG) begin : blkConfig - for(int unsigned c = 0; c < C; c+=PE) begin - automatic addr_t addr = 0; - if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = c/PE; - for(int unsigned pe = 0; pe < PE; pe++) begin - if(PE > 1) addr[N+:$clog2(PE)] = pe; - for(int unsigned t = 0; t < 2**N-1; t++) begin - addr[0+:N] = t; - fork - begin - s_axilite_AWVALID <= 1; - s_axilite_AWADDR <= { addr, 2'b00 }; - @(posedge clk iff s_axilite_AWREADY); - s_axilite_AWVALID <= 0; - s_axilite_AWADDR <= 'x; - end - begin - s_axilite_WVALID <= 1; - s_axilite_WDATA <= THRESHS[c+pe][t]; - @(posedge clk iff s_axilite_WREADY); - s_axilite_WVALID <= 0; - s_axilite_WDATA <= 'x; - end - begin - s_axilite_BREADY <= 1; - @(posedge clk iff s_axilite_BVALID); - assert(s_axilite_BRESP == '0) else begin - $error("Error on parameter write."); - $stop; - end - s_axilite_BREADY <= 0; + for(int unsigned c = 0; c < C; c+=PE) begin + automatic addr_t addr = 0; + if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = c/PE; + for(int unsigned pe = 0; pe < PE; pe++) begin + if(PE > 1) addr[N+:$clog2(PE)] = pe; + for(int unsigned t = 0; t < 2**N-1; t++) begin + addr[0+:N] = t; + fork + begin + s_axilite_AWVALID <= 1; + s_axilite_AWADDR <= { addr, 2'b00 }; + @(posedge clk iff s_axilite_AWREADY); + s_axilite_AWVALID <= 0; + s_axilite_AWADDR <= 'x; + end + begin + s_axilite_WVALID <= 1; + s_axilite_WDATA <= THRESHS[c+pe][t]; + @(posedge clk iff s_axilite_WREADY); + s_axilite_WVALID <= 0; + s_axilite_WDATA <= 'x; + end + begin + s_axilite_BREADY <= 1; + @(posedge clk iff s_axilite_BVALID); + assert(s_axilite_BRESP == '0) else begin + $error("Error on parameter write."); + $stop; end - join - end + s_axilite_BREADY <= 0; + end + join end end - end : blkConfig + end fork // Intermittent configuration readback From 9de06be72165d7b2a8bc5d87ca2e2ad0cfca1fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 28 Nov 2023 07:37:21 +0000 Subject: [PATCH 107/111] Adding parameters to request local memories of a given depth to be mapped to BRAM or URAM. --- finn-rtllib/thresholding/hdl/thresholding.sv | 11 ++++++++++- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 7 ++++++- .../thresholding/hdl/thresholding_axi_tpl_inner.sv | 8 +++++++- .../thresholding/hdl/thresholding_axi_tpl_outer.v | 8 +++++++- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index ff3d4172ab..ff801ac7b9 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -65,6 +65,10 @@ module thresholding #( parameter THRESHOLDS_PATH = "", bit USE_CONFIG = 1, + // Force Use of On-Chip Memory Blocks + int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) + int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM + localparam int unsigned CF = C/PE, // Channel fold localparam int unsigned O_BITS = BIAS >= 0? /* unsigned */ $clog2(2**N+BIAS) : @@ -193,8 +197,13 @@ module thresholding #( // Threshold Memory val_t Thresh; // Read-out register if(1) begin : blkThresh + localparam int unsigned DEPTH = CF * 2**stage; + localparam RAM_STYLE = + DEPTH_TRIGGER_URAM && (DEPTH >= DEPTH_TRIGGER_URAM)? "ultra" : + DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" : "auto"; - val_t Threshs[CF * 2**stage]; + (* RAM_STYLE = RAM_STYLE *) + val_t Threshs[DEPTH]; if(THRESHOLDS_PATH != "") begin localparam FILE = $sformatf("%s/threshs_%0d_%0d.dat", THRESHOLDS_PATH, pe, stage); initial $readmemh(FILE, Threshs); diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 69617a20d9..1254d71750 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -53,6 +53,10 @@ module thresholding_axi #( bit USE_AXILITE, // Implement AXI-Lite for threshold read/write + // Force Use of On-Chip Memory Blocks + int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) + int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM + localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, localparam int unsigned O_BITS = BIAS >= 0? @@ -143,7 +147,8 @@ module thresholding_axi #( thresholding #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), - .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE) + .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE), + .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) ) impl ( .clk(ap_clk), .rst(!ap_rst_n), diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv index b1350a9f31..d1c5333ebf 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv @@ -44,6 +44,10 @@ module thresholding_axi_tpl_inner #( parameter THRESHOLDS_PATH = "../../../data", bit USE_AXILITE = 1, // Implement AXI-Lite for threshold read/write + // Force Use of On-Chip Memory Blocks + int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) + int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM + localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, localparam int unsigned O_BITS = BIAS >= 0? @@ -96,7 +100,9 @@ module thresholding_axi_tpl_inner #( .FPARG(FPARG), .BIAS(BIAS), .THRESHOLDS_PATH(THRESHOLDS_PATH), - .USE_AXILITE(USE_AXILITE) + .USE_AXILITE(USE_AXILITE), + .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), + .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) ) core ( .ap_clk, .ap_rst_n, diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v index 13c8189f0e..ff2d0c3c74 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v +++ b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v @@ -44,6 +44,10 @@ module thresholding_axi_tpl_outer #( parameter THRESHOLDS_PATH = $THRESHOLDS_PATH$, // Directory with initial threshold data parameter USE_AXILITE = $USE_AXILITE$, // Implement AXI-Lite for threshold read/write + // Force Use of On-Chip Memory Blocks + parameter DEPTH_TRIGGER_URAM = $DEPTH_TRIGGER_URAM$, // if non-zero, local mems of this depth or more go into URAM (prio) + parameter DEPTH_TRIGGER_BRAM = $DEPTH_TRIGGER_BRAM$, // if non-zero, local mems of this depth or more go into BRAM + parameter O_BITS = $O_BITS$ )( // Global Control @@ -95,7 +99,9 @@ module thresholding_axi_tpl_outer #( .FPARG(FPARG), .BIAS(BIAS), .THRESHOLDS_PATH(THRESHOLDS_PATH), - .USE_AXILITE(USE_AXILITE) + .USE_AXILITE(USE_AXILITE), + .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), + .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) ) core ( .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), From 038a58a80dddc47365a69d429123425f59f6f810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 29 Nov 2023 09:40:05 +0000 Subject: [PATCH 108/111] Prevent BRAM use below specified trigger. --- finn-rtllib/thresholding/hdl/thresholding.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index ff801ac7b9..8f862c7bf2 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -200,7 +200,9 @@ module thresholding #( localparam int unsigned DEPTH = CF * 2**stage; localparam RAM_STYLE = DEPTH_TRIGGER_URAM && (DEPTH >= DEPTH_TRIGGER_URAM)? "ultra" : - DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" : "auto"; + DEPTH_TRIGGER_BRAM && (DEPTH >= DEPTH_TRIGGER_BRAM)? "block" : + // If BRAM trigger defined, force distributed memory below if Vivado may be tempted to use BRAM nonetheless. + DEPTH_TRIGGER_BRAM && (DEPTH >= 64)? "distributed" : "auto"; (* RAM_STYLE = RAM_STYLE *) val_t Threshs[DEPTH]; From f39187c2c8ee6952755fec3753d5107bfd1e48e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 1 Dec 2023 09:00:20 +0000 Subject: [PATCH 109/111] Fixing N vs. 2^N confusion in the module description. --- finn-rtllib/thresholding/hdl/thresholding.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 8f862c7bf2..4e1de356c2 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -44,11 +44,11 @@ * accompanied by a channel selector. * * Parameter Layout as seen on AXI-Lite (row by row): - * | Base \ Offs | 0 1 2 ... N-2 N-1 - * ---------+------------------------------+---------------------------------- - * Chnl #0 | 0 | T_0 T_1 T_2 ... T_{N-2} 'x - * Chnl #1 | N | T_0 T_1 T_2 ... T_{N-2} 'x - * Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*N | T_0 T_1 T_2 ... T_{N-2} 'x + * | Base \ Offs | 0 1 2 ... 2^N-2 2^N-1 + * ---------+--------------------------------+------------------------------------ + * Chnl #0 | 0 | T_0 T_1 T_2 ... T_{2^N-2} 'x + * Chnl #1 | 2^N | T_0 T_1 T_2 ... T_{2^N-2} 'x + * Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*2^N | T_0 T_1 T_2 ... T_{2^N-2} 'x * *****************************************************************************/ module thresholding #( From 7284d2c6bf0210dd76294acc34e1d7ff8378db87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 6 Dec 2023 11:02:25 +0000 Subject: [PATCH 110/111] Add deep pipelining option to thresholding implementation. --- finn-rtllib/thresholding/hdl/thresholding.sv | 28 +++++++++++++++---- .../thresholding/sim/thresholding_tb.sv | 8 ++++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding.sv b/finn-rtllib/thresholding/hdl/thresholding.sv index 4e1de356c2..75fbb61a4d 100644 --- a/finn-rtllib/thresholding/hdl/thresholding.sv +++ b/finn-rtllib/thresholding/hdl/thresholding.sv @@ -68,6 +68,7 @@ module thresholding #( // Force Use of On-Chip Memory Blocks int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM + bit DEEP_PIPELINE = 0, localparam int unsigned CF = C/PE, // Channel fold localparam int unsigned O_BITS = BIAS >= 0? @@ -128,14 +129,15 @@ module thresholding #( // - configuration always takes precedence // - number of pending thresholding ops capped to N+3 // across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1 + localparam int unsigned MAX_PENDING = (DEEP_PIPELINE+1)*N + 3; pipe_t pipe[PE][N+1]; if(1) begin : blkFeed // Thresholding Input Guard ensuring Output FIFO is never overrun - logic signed [$clog2(N+3):0] GuardSem = N+2; // N+2, N+1, ..., 0, -1 + logic signed [$clog2(MAX_PENDING):0] GuardSem = MAX_PENDING-1; // MAX_PENDING-1, ..., 0, -1 uwire th_full = GuardSem[$left(GuardSem)]; always_ff @(posedge clk) begin - if(rst) GuardSem <= N+2; + if(rst) GuardSem <= MAX_PENDING-1; else begin automatic logic dec = !(USE_CONFIG && cfg_en) && !th_full && ivld; automatic logic inc = ovld && ordy; @@ -268,13 +270,29 @@ module thresholding #( endcase end end : blkSignedFloat + + // Pipeline State Update + pipe_t pp; always_comb begin - automatic pipe_t pp = P; + pp = P; if(P.op !=? CFG) pp.ptr[SN] = cmp; if(Reval) pp.val = Thresh; - pipe[pe][stage+1] = pp; end + // Pipeline State Forward (potentially additional register) + pipe_t pf; + if(!DEEP_PIPELINE) assign pf = pp; + else begin + pipe_t Pf = '{ op: NOP, default: 'x }; + always_ff @(posedge clk) begin + if(rst) Pf <= '{ op: NOP, default: 'x }; + else Pf <= pp; + end + assign pf = Pf; + end + + assign pipe[pe][stage+1] = pf; + end : genPE end : genStages @@ -295,7 +313,7 @@ module thresholding #( // - Depth of N + Output Reg to allow pipe to drain entirely under backpressure // - Typically mapped to an SRL shift register if(1) begin : blkStreamOutput - localparam int unsigned A_DEPTH = N+2; + localparam int unsigned A_DEPTH = MAX_PENDING - 1; logic [PE-1 : 0][N-1 : 0] ADat[A_DEPTH]; logic signed [$clog2(A_DEPTH):0] APtr = '1; // -1, 0, 1, ..., A_DEPTH-1 uwire avld = !APtr[$left(APtr)]; diff --git a/finn-rtllib/thresholding/sim/thresholding_tb.sv b/finn-rtllib/thresholding/sim/thresholding_tb.sv index 20f3879422..e42145f10e 100644 --- a/finn-rtllib/thresholding/sim/thresholding_tb.sv +++ b/finn-rtllib/thresholding/sim/thresholding_tb.sv @@ -41,6 +41,8 @@ module thresholding_tb #( localparam int unsigned CF = C/PE // Channel Fold ); + localparam bit DEEP_PIPELINE = 1; + localparam int unsigned MST_STRM_WROUNDS = 507; localparam bit THROTTLED = 1; @@ -85,7 +87,7 @@ module thresholding_tb #( uwire ovld; uwire [PE-1:0][N-1:0] odat; - thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1)) dut ( + thresholding #(.N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .USE_CONFIG(1), .DEEP_PIPELINE(DEEP_PIPELINE)) dut ( .clk, .rst, // Configuration @@ -165,7 +167,7 @@ module thresholding_tb #( cfg_we <= 'x; cfg_a <= 'x; @(posedge clk); - if(($urandom()%37) == 0) begin + if(($urandom()%41) == 0) begin automatic addr_t addr = $urandom()%(N-1); if(PE > 1) addr[N+:$clog2(PE)] = $urandom()%PE; if(CF > 1) addr[N+$clog2(PE)+:$clog2(CF)] = $urandom()%CF; @@ -194,7 +196,7 @@ module thresholding_tb #( end join_any done <= 1; - repeat(N+6) @(posedge clk); + repeat((DEEP_PIPELINE+1)*N+6) @(posedge clk); assert(QW.size() == 0) else begin $error("[%0d] Missing %0d outputs.", i, QW.size()); From aa57255b7b0f81b731d5a8c77230dff15c5fb065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 6 Dec 2023 17:09:36 +0000 Subject: [PATCH 111/111] Removing the inner wrapper. Exposing the DEEP_PIPELINING option and adjusting to established naming in FINN. --- .../thresholding/hdl/thresholding_axi.sv | 4 +- .../hdl/thresholding_axi_tpl_inner.sv | 119 ------------------ ...uter.v => thresholding_template_wrapper.v} | 28 +++-- 3 files changed, 18 insertions(+), 133 deletions(-) delete mode 100644 finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv rename finn-rtllib/thresholding/hdl/{thresholding_axi_tpl_outer.v => thresholding_template_wrapper.v} (85%) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 1254d71750..1f235b9486 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -56,6 +56,7 @@ module thresholding_axi #( // Force Use of On-Chip Memory Blocks int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM + bit DEEP_PIPELINE = 0, localparam int unsigned CF = C/PE, // Channel Fold localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, @@ -148,7 +149,8 @@ module thresholding_axi #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), .BIAS(BIAS), .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_CONFIG(USE_AXILITE), - .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) + .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM), + .DEEP_PIPELINE(DEEP_PIPELINE) ) impl ( .clk(ap_clk), .rst(!ap_rst_n), diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv b/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv deleted file mode 100644 index d1c5333ebf..0000000000 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_inner.sv +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Copyright (c) 2023, Xilinx - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * * Neither the name of FINN nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * @author Thomas B. Preußer - * @brief Verilog wrapper for IP packaging. - */ - -module thresholding_axi_tpl_inner #( - int unsigned N = 4, // output precision - int unsigned K = 9, // input/threshold precision - int unsigned C = 6, // Channels - int unsigned PE = 2, // Processing Parallelism, requires C = k*PE - - int unsigned SIGNED = 1, // signed inputs - int unsigned FPARG = 0, // floating-point inputs: [sign] | exponent | mantissa - int BIAS = 0, // offsetting the output [0, 2^N-1] -> [BIAS, 2^N-1 + BIAS] - - parameter THRESHOLDS_PATH = "../../../data", - bit USE_AXILITE = 1, // Implement AXI-Lite for threshold read/write - - // Force Use of On-Chip Memory Blocks - int unsigned DEPTH_TRIGGER_URAM = 0, // if non-zero, local mems of this depth or more go into URAM (prio) - int unsigned DEPTH_TRIGGER_BRAM = 0, // if non-zero, local mems of this depth or more go into BRAM - - localparam int unsigned CF = C/PE, // Channel Fold - localparam int unsigned ADDR_BITS = $clog2(CF) + $clog2(PE) + N + 2, - localparam int unsigned O_BITS = BIAS >= 0? - /* unsigned */ $clog2(2**N+BIAS) : - /* signed */ 1+$clog2(-BIAS >= 2**(N-1)? -BIAS : 2**N+BIAS) -)( - // Global Control - input ap_clk, - input ap_rst_n, - - //- AXI Lite ------------------------ - // Writing - input s_axilite_AWVALID, - output s_axilite_AWREADY, - input [ADDR_BITS-1:0] s_axilite_AWADDR, // lowest 2 bits (byte selectors) are ignored - - input s_axilite_WVALID, - output s_axilite_WREADY, - input [31:0] s_axilite_WDATA, - input [ 3:0] s_axilite_WSTRB, - - output s_axilite_BVALID, - input s_axilite_BREADY, - output [1:0] s_axilite_BRESP, - - // Reading - input s_axilite_ARVALID, - output s_axilite_ARREADY, - input [ADDR_BITS-1:0] s_axilite_ARADDR, - - output s_axilite_RVALID, - input s_axilite_RREADY, - output [31:0] s_axilite_RDATA, - output [ 1:0] s_axilite_RRESP, - - //- AXI Stream - Input -------------- - output s_axis_tready, - input s_axis_tvalid, - input [((PE*K+7)/8)*8-1:0] s_axis_tdata, - - //- AXI Stream - Output ------------- - input m_axis_tready, - output m_axis_tvalid, - output [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata -); - - thresholding_axi #( - .N(N), .K(K), .C(C), .PE(PE), - .SIGNED(SIGNED), - .FPARG(FPARG), - .BIAS(BIAS), - .THRESHOLDS_PATH(THRESHOLDS_PATH), - .USE_AXILITE(USE_AXILITE), - .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), - .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) - ) core ( - .ap_clk, .ap_rst_n, - - .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, - .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB, - .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, - - .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, - .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, - .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, - .m_axis_tready, .m_axis_tvalid, .m_axis_tdata - ); - -endmodule : thresholding_axi_tpl_inner diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v similarity index 85% rename from finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v rename to finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v index ff2d0c3c74..3f0b012ef1 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi_tpl_outer.v +++ b/finn-rtllib/thresholding/hdl/thresholding_template_wrapper.v @@ -31,7 +31,7 @@ * @brief Verilog wrapper for IP packaging. */ -module thresholding_axi_tpl_outer #( +module thresholding_template_wrapper #( parameter N = $N$, // output precision parameter K = $M$, // input/threshold precision parameter C = $C$, // Channels @@ -47,11 +47,12 @@ module thresholding_axi_tpl_outer #( // Force Use of On-Chip Memory Blocks parameter DEPTH_TRIGGER_URAM = $DEPTH_TRIGGER_URAM$, // if non-zero, local mems of this depth or more go into URAM (prio) parameter DEPTH_TRIGGER_BRAM = $DEPTH_TRIGGER_BRAM$, // if non-zero, local mems of this depth or more go into BRAM + parameter DEEP_PIPELINE = $DEEP_PIPELINE$, // [bit] extra pipeline stages for easier timing closure parameter O_BITS = $O_BITS$ )( // Global Control - (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:s_axis:m_axis, ASSOCIATED_RESET ap_rst_n" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF s_axilite:in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *) (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) input ap_clk, (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) @@ -83,17 +84,17 @@ module thresholding_axi_tpl_outer #( output [ 1:0] s_axilite_RRESP, //- AXI Stream - Input -------------- - output s_axis_tready, - input s_axis_tvalid, - input [((PE*K+7)/8)*8-1:0] s_axis_tdata, + output in0_V_tready, + input in0_V_tvalid, + input [((PE*K+7)/8)*8-1:0] in0_V_tdata, //- AXI Stream - Output ------------- - input m_axis_tready, - output m_axis_tvalid, - output [((PE*O_BITS+7)/8)*8-1:0] m_axis_tdata + input out_V_tready, + output out_V_tvalid, + output [((PE*O_BITS+7)/8)*8-1:0] out_V_tdata ); - thresholding_axi_tpl_inner #( + thresholding_axi #( .N(N), .K(K), .C(C), .PE(PE), .SIGNED(SIGNED), .FPARG(FPARG), @@ -101,7 +102,8 @@ module thresholding_axi_tpl_outer #( .THRESHOLDS_PATH(THRESHOLDS_PATH), .USE_AXILITE(USE_AXILITE), .DEPTH_TRIGGER_URAM(DEPTH_TRIGGER_URAM), - .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM) + .DEPTH_TRIGGER_BRAM(DEPTH_TRIGGER_BRAM), + .DEEP_PIPELINE(DEEP_PIPELINE) ) core ( .ap_clk(ap_clk), .ap_rst_n(ap_rst_n), @@ -111,8 +113,8 @@ module thresholding_axi_tpl_outer #( .s_axilite_ARVALID(s_axilite_ARVALID), .s_axilite_ARREADY(s_axilite_ARREADY), .s_axilite_ARADDR(s_axilite_ARADDR), .s_axilite_RVALID(s_axilite_RVALID), .s_axilite_RREADY(s_axilite_RREADY), .s_axilite_RDATA(s_axilite_RDATA), .s_axilite_RRESP(s_axilite_RRESP), - .s_axis_tready(s_axis_tready), .s_axis_tvalid(s_axis_tvalid), .s_axis_tdata(s_axis_tdata), - .m_axis_tready(m_axis_tready), .m_axis_tvalid(m_axis_tvalid), .m_axis_tdata(m_axis_tdata) + .s_axis_tready(in0_V_tready), .s_axis_tvalid(in0_V_tvalid), .s_axis_tdata(in0_V_tdata), + .m_axis_tready(out_V_tready), .m_axis_tvalid(out_V_tvalid), .m_axis_tdata(out_V_tdata) ); -endmodule // thresholding_axi_tpl_outer +endmodule // thresholding_template_wrapper